Tanzania fertilizer use efficiency, profitability and risks across time

Author

Maxwell Mkondiwa (m.mkondiwa@cgiar.org) and Jordan Chamberlin (j.chamberlin@cgiar.org)

1 Introduction

This notebook provides R code for the paper on estimating the heterogenous treatment effects in Tanzania using non-parametric geoadditive and causal machine learning (ML) models. The analytics focus on addressing the following analysis questions:

2 Exploration

Code
# package names
packages <- c("gplots", "modelsummary", "grf", "policytree", "ggplot2", "micEcon", "frontier", "dplyr", "tidyr", "knitr", "car", "RColorBrewer", "DT", "rio", "tidyr", "dsfa", "mgcv", "geodata", "sf", "mapview", "dplyr", "terra", "raster", "ggridges", "rio", "BART", "BART", "BayesTree", "bartCause", "plm", "rlearner")

# install packages
# installed_packages <- packages %in% rownames(installed.packages())
# if (any(installed_packages == FALSE)) {
#     install.packages(packages[!installed_packages])
# }

# load packages
invisible(lapply(packages, library, character.only = TRUE))
# install.packages("collapse", repos = "https://fastverse.r-universe.dev")
library(rio)


#tz_lsms_panel <- import("tz_lsms_panel_plot_level_221117.dta")
tz_lsms_panel <- import("tz_lsms_panel.csv")

tz_lsms_panel$yld_[tz_lsms_panel$yld_ > quantile(tz_lsms_panel$yld_, 0.99, na.rm = TRUE)] <- NA
tz_lsms_panel$N_kgha[tz_lsms_panel$N_kgha > quantile(tz_lsms_panel$N_kgha, 0.99, na.rm = TRUE)] <- NA

tz_lsms_panel$plotha[tz_lsms_panel$plotha > quantile(tz_lsms_panel$plotha, 0.99, na.rm = TRUE)] <- NA

tz_lsms_panel$N_kgha_dum <- 0
tz_lsms_panel$N_kgha_dum[tz_lsms_panel$N_kgha > 0] <- 1

tz_lsms_panel$N_kgha_dum <- as.numeric(tz_lsms_panel$N_kgha_dum)

tz_lsms_panel$N_kgha_cond <- tz_lsms_panel$N_kgha
tz_lsms_panel$N_kgha_cond[tz_lsms_panel$N_kgha_dum == 0] <- NA

tz_lsms_panel$N_kgha_cond <- as.numeric(tz_lsms_panel$N_kgha_cond)


tz_lsms_panel=subset(tz_lsms_panel, !(is.na(tz_lsms_panel$yld_)))
tz_lsms_panel=subset(tz_lsms_panel, !(is.na(tz_lsms_panel$N_kgha)))
tz_lsms_panel=subset(tz_lsms_panel, !(is.na(tz_lsms_panel$plotha)))
tz_lsms_panel=subset(tz_lsms_panel, !(is.na(tz_lsms_panel$P_kgha)))
tz_lsms_panel=subset(tz_lsms_panel, !(is.na(tz_lsms_panel$harv_yld_mai)))

tz_lsms_panel$soc_5_15cm=tz_lsms_panel$`soc_5-15cm`
tz_lsms_panel$nitrogen_0_5cm=tz_lsms_panel$`nitrogen_0-5cm`
tz_lsms_panel$sand_0_5cm=tz_lsms_panel$`sand_0-5cm`
tz_lsms_panel$ECN_5_15cm=tz_lsms_panel$`ECN_5-15cm`
tz_lsms_panel$pH_0_5cm=tz_lsms_panel$`pH_0-5cm`

3 Descriptives table

Code
library(modelsummary)

mean_na <- function(x) mean(x, na.rm = TRUE)
sd_na <- function(x) SD(x, na.rm = TRUE)
summary_table_by_year <- datasummary(Heading("N obs") * N + Heading("%") * Percent() + (yld_ + N_kgha_dum  +N_kgha_cond+ N_kgha + P_kgha+plotha + ncrops + hhmem + femhead + headeduc + arain_tot+arain_cv) * (mean_na + sd_na) ~ Factor(year), data = tz_lsms_panel, output = "data.frame")

summary_table_by_year_mean <- datasummary(Heading("N obs") * N + Heading("%") * Percent() + (yld_ + N_kgha_dum  +N_kgha_cond+ N_kgha + P_kgha+plotha + ncrops + hhmem + femhead + headeduc + arain_tot+arain_cv) * (mean_na) ~ Factor(year), data = tz_lsms_panel, output = "data.frame")




library(reactable)
library(htmltools)
library(fontawesome)

htmltools::browsable(
    tagList(
        tags$button(
            tagList(fontawesome::fa("download"), "Download as CSV"),
            onclick = "Reactable.downloadDataCSV('summary_table_by_year', 'summary_table_by_year.csv')"
        ),
        reactable(
            summary_table_by_year,
            searchable = TRUE,
            defaultPageSize = 38,
            elementId = "summary_table_by_year"
        )
    )
)
Code
library(reactable)
library(htmltools)
library(fontawesome)

htmltools::browsable(
    tagList(
        tags$button(
            tagList(fontawesome::fa("download"), "Download as CSV"),
            onclick = "Reactable.downloadDataCSV('summary_table_by_year_mean', 'summary_table_by_year_mean.csv')"
        ),
        reactable(
            summary_table_by_year_mean,
            searchable = TRUE,
            defaultPageSize = 38,
            elementId = "summary_table_by_year_mean"
        )
    )
)
Code
# # By district and by year
# summary_table_by_year_dist <- datasummary(Heading("N obs") * N + Heading("%") * Percent() + (yld_ + N_kgha_g_0  + N_kgha + plotha + P_kgha + ncrops + hhmem + femhead + headeduc + arain_tot) * (mean_na + sd_na) ~ Factor(year*district), data = tz_lsms_panel, output = "data.frame")
# 
# library(reactable)
# library(htmltools)
# library(fontawesome)
# 
# htmltools::browsable(
#     tagList(
#         tags$button(
#             tagList(fontawesome::fa("download"), "Download as CSV"),
#             onclick = "Reactable.downloadDataCSV('summary_table_by_year_dist', 'summary_table_by_year_dist.csv')"
#         ),
#         reactable(
#             summary_table_by_year,
#             searchable = TRUE,
#             defaultPageSize = 38,
#             elementId = "summary_table_by_year_dist"
#         )
#     )
# )


tz_lsms_panel$region=as.factor(tz_lsms_panel$region)
tz_lsms_panel$year=as.factor(tz_lsms_panel$year)

# By district and by year
summary_table_by_year_region <- datasummary(Heading("N obs") * N + Heading("%") * Percent() + (yld_ + N_kgha_dum  +N_kgha_cond+ N_kgha + P_kgha+plotha+ ncrops + hhmem + femhead + headeduc + arain_tot)*(mean_na) ~ year*region, data = tz_lsms_panel, output = "data.frame")

summary_table_by_year_region_mean <- datasummary(Heading("N obs") * N + Heading("%") * Percent() + (yld_ + N_kgha_dum  +N_kgha_cond+ N_kgha + P_kgha+plotha + ncrops + hhmem + femhead + headeduc + arain_tot)*(mean_na) ~ year*region, data = tz_lsms_panel, output = "data.frame")


summary_table_by_year_region_t=t(summary_table_by_year_region)
summary_table_by_year_region_mean_t=t(summary_table_by_year_region_mean)

library(reactable)
library(htmltools)
library(fontawesome)

htmltools::browsable(
    tagList(
        tags$button(
            tagList(fontawesome::fa("download"), "Download as CSV"),
            onclick = "Reactable.downloadDataCSV('summary_table_by_year_region_t', 'summary_table_by_year_region_t.csv')"
        ),
        reactable(
            summary_table_by_year_region_t,
            searchable = TRUE,
            defaultPageSize = 38,
            elementId = "summary_table_by_year_region_t"
        )
    )
)
Code
library(reactable)
library(htmltools)
library(fontawesome)

htmltools::browsable(
    tagList(
        tags$button(
            tagList(fontawesome::fa("download"), "Download as CSV"),
            onclick = "Reactable.downloadDataCSV('summary_table_by_year_region_mean_t', 'summary_table_by_year_region_mean_t.csv')"
        ),
        reactable(
            summary_table_by_year_region_mean_t,
            searchable = TRUE,
            defaultPageSize = 38,
            elementId = "summary_table_by_year_region_mean_t"
        )
    )
)

4 Graphics

Code
library(gplots)
plotmeans(yld_ ~ year, main="Yield heterogeineity across years",xlab="Year", ylab="Maize yield (kg/ha)", data=tz_lsms_panel)

Code
library(gplots)
plotmeans(N_kgha ~ year, main="N per ha heterogeineity across years", data=tz_lsms_panel,xlab="Year", ylab="Average N per ha" )

Code
plotmeans(arain_tot~ year, main="Rainfall across years", data=tz_lsms_panel,xlab="Year", ylab="Average total rainfall (mm)" )

Code
library(easynls)
library(lme4)
library(data.table)
library(ggplot2)

# summary_table_by_year_mean_t=t(summary_table_by_year_mean)
# 
# ggplot(summary_table_by_year_mean) +
#   geom_line(aes(x=N, y=Y, group=year, color=year))
# 
# ggplot(summary_table_by_year_region) +
#   geom_line(aes(x=N, y=Y, group=region, color=region))

5 Conventional Production Function Approach

5.1 Linear and non-linear parameter models: e.g., Quadratic

Code
library(data.table)
library(ggplot2)
library(easynls)
library(lme4)
library(nlme)

tz_lsms_panel=data.table(tz_lsms_panel)
tz_lsms_panel$year=as.factor(tz_lsms_panel$year)

#tz_lsms_panel_sf_subset=subset(tz_lsms_panel_sf, #select=c("V1","yld_","harv_yld_mai","N_kgha","P_kgha","year","soc_5-15cm","population_density","wc2.1_30s_elev","#sand_0-5cm","nitrogen_0-5cm","ECN_5-15cm","pH_0-5cm"))                                                      

#library(tidyr)
#tz_lsms_panel_sf_subset=tz_lsms_panel_sf_subset %>% drop_na()

# Linear
baseline_ols=lm(yld_~N_kgha+plotha+P_kgha+ncrops+hhmem+femhead+headeduc+arain_tot+region+year,data=tz_lsms_panel)

summary(baseline_ols)

Call:
lm(formula = yld_ ~ N_kgha + plotha + P_kgha + ncrops + hhmem + 
    femhead + headeduc + arain_tot + region + year, data = tz_lsms_panel)

Residuals:
    Min      1Q  Median      3Q     Max 
-2276.9  -440.5  -178.2   201.4  5410.4 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  7.339e+02  5.672e+01  12.937  < 2e-16 ***
N_kgha       8.740e+00  4.734e-01  18.463  < 2e-16 ***
plotha      -1.358e+02  6.983e+00 -19.450  < 2e-16 ***
P_kgha       5.709e+00  7.465e-01   7.647 2.26e-14 ***
ncrops      -5.799e+01  9.087e+00  -6.382 1.83e-10 ***
hhmem        1.498e+01  2.681e+00   5.590 2.34e-08 ***
femhead     -8.102e+01  1.881e+01  -4.306 1.68e-05 ***
headeduc     5.489e+01  1.269e+01   4.326 1.53e-05 ***
arain_tot   -9.941e-03  2.352e-02  -0.423 0.672523    
region2      3.340e+02  5.912e+01   5.649 1.66e-08 ***
region3      1.898e+02  5.912e+01   3.210 0.001330 ** 
region4      1.639e+02  5.139e+01   3.190 0.001427 ** 
region5      9.571e+01  5.310e+01   1.802 0.071506 .  
region6     -2.386e+02  8.462e+01  -2.819 0.004823 ** 
region7      7.442e+01  1.028e+02   0.724 0.469276    
region8     -1.022e+02  5.462e+01  -1.870 0.061448 .  
region9     -1.677e+02  4.873e+01  -3.441 0.000583 ***
region10     5.772e+01  5.082e+01   1.136 0.256121    
region11     2.204e+02  4.854e+01   4.541 5.67e-06 ***
region12     3.337e+02  4.719e+01   7.073 1.63e-12 ***
region13     1.397e+02  6.147e+01   2.272 0.023080 *  
region14    -1.022e+00  4.787e+01  -0.021 0.982975    
region15     4.573e+02  5.585e+01   8.189 2.99e-16 ***
region16    -3.991e+01  5.644e+01  -0.707 0.479530    
region17    -3.052e+01  5.167e+01  -0.591 0.554778    
region18    -1.510e+02  6.044e+01  -2.498 0.012512 *  
region19    -1.064e+02  5.697e+01  -1.869 0.061714 .  
region20    -2.175e+01  7.066e+01  -0.308 0.758166    
region21     4.673e+02  6.363e+01   7.344 2.24e-13 ***
region22     1.517e+01  8.852e+01   0.171 0.863907    
region23     4.757e+02  8.977e+01   5.299 1.19e-07 ***
region24     9.621e+01  6.453e+01   1.491 0.136034    
region25     1.258e+02  8.649e+01   1.454 0.145846    
region26     3.356e+02  1.713e+02   1.960 0.050049 .  
region51    -9.463e+01  2.206e+02  -0.429 0.668021    
region52    -1.786e+02  3.095e+02  -0.577 0.563956    
region53     9.781e+02  5.327e+02   1.836 0.066384 .  
region54    -9.700e+01  1.732e+02  -0.560 0.575529    
region55    -4.864e+02  2.869e+02  -1.695 0.090023 .  
year2010     5.237e+01  2.602e+01   2.013 0.044176 *  
year2012     2.558e+01  2.514e+01   1.017 0.308957    
year2014     2.145e+02  2.796e+01   7.671 1.87e-14 ***
year2020     1.759e+02  2.990e+01   5.885 4.13e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 751 on 9318 degrees of freedom
  (65 observations deleted due to missingness)
Multiple R-squared:  0.1938,    Adjusted R-squared:  0.1902 
F-statistic: 53.33 on 42 and 9318 DF,  p-value: < 2.2e-16
Code
# Lm List by year
baseline_ols_yearList <- lmList(yld_~N_kgha+plotha+P_kgha+ncrops+hhmem+femhead+headeduc+arain_tot|year,tz_lsms_panel)

baseline_ols_yearList_est <- coef(baseline_ols_yearList)
baseline_ols_yearList_est <-t(as.data.frame(baseline_ols_yearList_est))
order_tab <-rep(1,9)
order_tab <- as.data.frame(order_tab)
baseline_ols_yearList_est_dt=cbind(baseline_ols_yearList_est,order_tab)


baseline_ols_yearList_ci <- confint(baseline_ols_yearList)
baseline_ols_yearList_ci_dt <-t(as.data.frame(baseline_ols_yearList_ci))
order_tab <- rep(2:3,9)
order_tab <- as.data.frame(order_tab)

baseline_ols_yearList_ci_dt=cbind(baseline_ols_yearList_ci_dt,order_tab)

baseline_ols_yearList_est_ci_dt=bind_rows(baseline_ols_yearList_est_dt,baseline_ols_yearList_ci_dt)

write.csv(baseline_ols_yearList_est_ci_dt,"tables/baseline_ols_yearList_est_ci_dt.csv")


library(reactable)
library(htmltools)
library(fontawesome)

htmltools::browsable(
    tagList(
        tags$button(
            tagList(fontawesome::fa("download"), "Download as CSV"),
            onclick = "Reactable.downloadDataCSV('baseline_ols_yearList_est_ci_dt', 'baseline_ols_yearList_est_ci_dt.csv')"
        ),
        reactable(
            baseline_ols_yearList_est_ci_dt,
            searchable = TRUE,
            defaultPageSize = 38,
            elementId = "baseline_ols_yearList_est_ci_dt"
        )
    )
)
Code
(ci <- confint(baseline_ols_yearList))
An object of class "lmList4.confint"
, , (Intercept)

        2.5 %    97.5 %
2008 753.6952 1108.2260
2010 693.6715 1019.4392
2012 587.1459  833.0646
2014 843.9498 1152.8395
2020 930.4129 1245.5588

, , N_kgha

         2.5 %    97.5 %
2008  6.804201 10.533569
2010  5.135811  9.041654
2012  9.739294 13.283728
2014 10.776845 15.272801
2020  7.820350 12.099934

, , plotha

         2.5 %     97.5 %
2008 -160.8936  -87.46432
2010 -204.6546 -138.66503
2012 -152.5085 -103.62371
2014 -133.6674  -69.94889
2020 -148.1365  -86.64075

, , P_kgha

         2.5 %    97.5 %
2008 10.791759 20.100039
2010  6.328560 13.038997
2012 -2.355263  2.685182
2014  3.618337 11.292289
2020  4.595312 11.268829

, , ncrops

          2.5 %      97.5 %
2008 -139.15730 -58.1379880
2010 -131.95260 -46.4961685
2012  -67.49843   0.8182895
2014 -146.46833 -66.4786011
2020 -116.89111 -31.8827618

, , hhmem

         2.5 %   97.5 %
2008 3.8924144 30.85386
2010 6.0049495 29.81852
2012 0.1587785 18.57740
2014 2.3759461 27.57765
2020 1.0715666 23.32364

, , femhead

         2.5 %     97.5 %
2008 -172.0813   4.242785
2010 -165.9178   3.368091
2012 -117.3159  25.918380
2014 -199.0045 -15.986792
2020 -199.0741 -26.895477

, , headeduc

           2.5 %    97.5 %
2008  -0.6231079 134.78593
2010 -43.0080862  84.66444
2012  -7.4391648  85.21241
2014   5.6864344 121.12947
2020 -16.8345142  91.92433

, , arain_tot

           2.5 %     97.5 %
2008 -0.18387466 0.09048751
2010 -0.01880891 0.23780928
2012  0.01570261 0.19941373
2014 -0.05366435 0.14096787
2020 -0.13928780 0.01281851
Code
plot(ci,cex=2, cex.lab=3, xlab="Maize yield (kg/ha) response", ylab="Year")

Code
# Visreg
library(visreg)

tz_lsms_panel_2008 <- subset(tz_lsms_panel,tz_lsms_panel$year=="2008")
tz_lsms_panel_2010 <- subset(tz_lsms_panel,tz_lsms_panel$year=="2010")
tz_lsms_panel_2012 <- subset(tz_lsms_panel,tz_lsms_panel$year=="2012")
tz_lsms_panel_2014 <- subset(tz_lsms_panel,tz_lsms_panel$year=="2014")
tz_lsms_panel_2020 <- subset(tz_lsms_panel,tz_lsms_panel$year=="2020")

baseline_ols_2008 <- lm(yld_~N_kgha+plotha+P_kgha+ncrops+hhmem+femhead+headeduc+arain_tot,data=tz_lsms_panel_2008)

baseline_ols_2010 <- lm(yld_~N_kgha+plotha+P_kgha+ncrops+hhmem+femhead+headeduc+arain_tot,data=tz_lsms_panel_2010)

baseline_ols_2012 <- lm(yld_~N_kgha+plotha+P_kgha+ncrops+hhmem+femhead+headeduc+arain_tot,data=tz_lsms_panel_2012)

baseline_ols_2014 <- lm(yld_~N_kgha+plotha+P_kgha+ncrops+hhmem+femhead+headeduc+arain_tot,data=tz_lsms_panel_2014)

baseline_ols_2020 <- lm(yld_~N_kgha+plotha+P_kgha+ncrops+hhmem+femhead+headeduc+arain_tot,data=tz_lsms_panel_2020)

baseline_ols_2008_v <-visreg(baseline_ols_2008, "N_kgha", plot=FALSE)
baseline_ols_2010_v <-visreg(baseline_ols_2010, "N_kgha", plot=FALSE)
baseline_ols_2012_v <-visreg(baseline_ols_2012, "N_kgha", plot=FALSE)
baseline_ols_2014_v <-visreg(baseline_ols_2014, "N_kgha", plot=FALSE)
baseline_ols_2020_v <-visreg(baseline_ols_2020, "N_kgha", plot=FALSE)


baseline_ols_all_plot <- visregList(baseline_ols_2008_v,baseline_ols_2010_v,baseline_ols_2012_v,baseline_ols_2014_v,baseline_ols_2020_v, collapse=TRUE,
                  labels=c("2008","2010","2012","2014","2020"))
op <- par(mfrow=c(1,5))
par(op)

plot(baseline_ols_all_plot,ylab="Maize yield (kg per ha)",xlab="Applied nitrogen (kg per ha)", ylim=c(0,3000), main="Linear",partial=FALSE,rug=FALSE)

Code
## LINEAR PLATEAU
# library(easynls)
# library(data.table)
# tz_lsms_panel <- data.table(tz_lsms_panel)
# tz_lsms_panel.temp <- tz_lsms_panel[, .("yld_","N_kgha")]
# nls.LP <- nlsfit(tz_lsms_panel.temp, model = 3)
# nls.LP$Model
# lp_model=nls.LP$Parameters
# lp_model
# summary(lp_model)

# QUADRATIC

baseline_quad=lm(yld_~N_kgha+I(N_kgha^2)+P_kgha+I(P_kgha^2)+plotha+ncrops+hhmem+femhead+headeduc+arain_tot+region+year,data=tz_lsms_panel)

summary(baseline_quad)

Call:
lm(formula = yld_ ~ N_kgha + I(N_kgha^2) + P_kgha + I(P_kgha^2) + 
    plotha + ncrops + hhmem + femhead + headeduc + arain_tot + 
    region + year, data = tz_lsms_panel)

Residuals:
    Min      1Q  Median      3Q     Max 
-2389.1  -439.4  -179.0   203.8  5406.2 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  7.325e+02  5.666e+01  12.928  < 2e-16 ***
N_kgha       4.423e+00  1.306e+00   3.385 0.000713 ***
I(N_kgha^2)  4.949e-02  1.470e-02   3.367 0.000763 ***
P_kgha       1.094e+01  1.373e+00   7.969 1.78e-15 ***
I(P_kgha^2) -4.156e-02  9.479e-03  -4.384 1.18e-05 ***
plotha      -1.353e+02  6.978e+00 -19.387  < 2e-16 ***
ncrops      -5.741e+01  9.077e+00  -6.325 2.65e-10 ***
hhmem        1.492e+01  2.677e+00   5.573 2.57e-08 ***
femhead     -8.131e+01  1.879e+01  -4.327 1.53e-05 ***
headeduc     5.550e+01  1.268e+01   4.378 1.21e-05 ***
arain_tot   -9.895e-03  2.349e-02  -0.421 0.673564    
region2      3.357e+02  5.905e+01   5.686 1.34e-08 ***
region3      2.043e+02  5.915e+01   3.454 0.000555 ***
region4      1.646e+02  5.133e+01   3.207 0.001347 ** 
region5      9.552e+01  5.303e+01   1.801 0.071696 .  
region6     -2.375e+02  8.451e+01  -2.811 0.004953 ** 
region7      7.094e+01  1.027e+02   0.691 0.489745    
region8     -1.020e+02  5.455e+01  -1.870 0.061488 .  
region9     -1.643e+02  4.867e+01  -3.375 0.000740 ***
region10     7.777e+01  5.105e+01   1.523 0.127720    
region11     2.355e+02  4.886e+01   4.820 1.46e-06 ***
region12     3.320e+02  4.732e+01   7.017 2.43e-12 ***
region13     1.442e+02  6.141e+01   2.349 0.018861 *  
region14     6.149e+00  4.788e+01   0.128 0.897820    
region15     4.597e+02  5.578e+01   8.241  < 2e-16 ***
region16    -3.761e+01  5.639e+01  -0.667 0.504791    
region17    -2.864e+01  5.160e+01  -0.555 0.578870    
region18    -1.489e+02  6.037e+01  -2.466 0.013672 *  
region19    -1.047e+02  5.689e+01  -1.841 0.065675 .  
region20    -2.094e+01  7.057e+01  -0.297 0.766618    
region21     4.676e+02  6.354e+01   7.358 2.02e-13 ***
region22     2.653e+01  8.873e+01   0.299 0.764949    
region23     4.824e+02  8.967e+01   5.380 7.65e-08 ***
region24     9.939e+01  6.445e+01   1.542 0.123071    
region25     1.260e+02  8.639e+01   1.458 0.144860    
region26     3.143e+02  1.711e+02   1.837 0.066268 .  
region51    -9.447e+01  2.204e+02  -0.429 0.668140    
region52    -1.776e+02  3.091e+02  -0.574 0.565656    
region53     9.799e+02  5.320e+02   1.842 0.065544 .  
region54    -9.773e+01  1.730e+02  -0.565 0.572163    
region55    -4.847e+02  2.865e+02  -1.692 0.090688 .  
year2010     5.492e+01  2.600e+01   2.112 0.034681 *  
year2012     2.679e+01  2.512e+01   1.067 0.286225    
year2014     2.118e+02  2.794e+01   7.583 3.70e-14 ***
year2020     1.729e+02  2.988e+01   5.785 7.50e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 750.1 on 9316 degrees of freedom
  (65 observations deleted due to missingness)
Multiple R-squared:  0.1961,    Adjusted R-squared:  0.1923 
F-statistic: 51.63 on 44 and 9316 DF,  p-value: < 2.2e-16
Code
library(modelsummary)

modelplot(baseline_quad)

Code
# Lm List by year
baseline_quad_lmList <- lmList(yld_~N_kgha+I(N_kgha^2)+P_kgha+I(P_kgha^2)+plotha+ncrops+hhmem+femhead+headeduc+arain_tot|year,tz_lsms_panel)

coef(baseline_quad_lmList )
     (Intercept)    N_kgha  I(N_kgha^2)    P_kgha I(P_kgha^2)    plotha
2008    941.2884  8.245976  0.005386832 31.060868 -0.18596358 -125.1292
2010    859.9147  4.099451  0.037900827 12.889122 -0.02254695 -170.5034
2012    707.3571 11.979181 -0.011235140  4.898355 -0.03068305 -128.5897
2014   1008.6227  6.679874  0.074984168  9.809642 -0.01960874 -101.5858
2020   1089.6014  6.361946  0.040491757 13.090728 -0.05669155 -117.3372
         ncrops     hhmem    femhead headeduc   arain_tot
2008  -98.66314 16.990854  -77.90737 64.89663 -0.05899779
2010  -88.92184 17.860252  -81.07230 22.47420  0.10802242
2012  -32.91387  9.316168  -44.65810 38.88373  0.10710486
2014 -106.06814 14.391925 -107.14145 66.54523  0.04239428
2020  -73.74805 12.240228 -113.17920 37.71171 -0.06338171
Code
(ci <- confint(baseline_quad_lmList ))
An object of class "lmList4.confint"
, , (Intercept)

        2.5 %    97.5 %
2008 764.5990 1117.9779
2010 696.8263 1023.0032
2012 583.8949  830.8193
2014 853.9373 1163.3080
2020 931.8077 1247.3951

, , N_kgha

          2.5 %    97.5 %
2008  2.9995373 13.492414
2010 -1.3862468  9.585148
2012  7.1794750 16.778886
2014  0.1162445 13.243503
2020  0.2718939 12.451998

, , I(N_kgha^2)

            2.5 %     97.5 %
2008 -0.052626628 0.06340029
2010 -0.030552341 0.10635400
2012 -0.070066087 0.04759581
2014  0.001795383 0.14817295
2020 -0.028307700 0.10929121

, , P_kgha

          2.5 %    97.5 %
2008 21.3512290 40.770507
2010  5.8539446 19.924298
2012  0.1744617  9.622248
2014  1.8860357 17.733248
2020  5.4949615 20.686494

, , I(P_kgha^2)

           2.5 %       97.5 %
2008 -0.28602385 -0.085903311
2010 -0.06665085  0.021556944
2012 -0.05652009 -0.004846009
2014 -0.08951536  0.050297883
2020 -0.13490524  0.021522135

, , plotha

         2.5 %     97.5 %
2008 -161.7082  -88.55014
2010 -203.5808 -137.42605
2012 -153.0684 -104.11096
2014 -133.4314  -69.74013
2020 -148.0832  -86.59112

, , ncrops

          2.5 %     97.5 %
2008 -139.04226 -58.284018
2010 -131.65563 -46.188048
2012  -67.04301   1.215271
2014 -146.04219 -66.094093
2020 -116.25594 -31.240167

, , hhmem

         2.5 %   97.5 %
2008 3.5593015 30.42241
2010 5.9490677 29.77144
2012 0.1064117 18.52593
2014 1.7842693 26.99958
2020 1.1083908 23.37207

, , femhead

         2.5 %     97.5 %
2008 -165.7955   9.980719
2010 -165.7184   3.573805
2012 -116.2459  26.929687
2014 -198.6871 -15.595800
2020 -199.2668 -27.091590

, , headeduc

          2.5 %    97.5 %
2008  -2.621147 132.41441
2010 -41.421413  86.36982
2012  -7.405629  85.17310
2014   8.764078 124.32639
2020 -16.737015  92.16044

, , arain_tot

           2.5 %     97.5 %
2008 -0.19584435 0.07784876
2010 -0.02030743 0.23635227
2012  0.01516811 0.19904162
2014 -0.05490576 0.13969432
2020 -0.13944422 0.01268080
Code
plot(ci,cex=2, cex.lab=3, xlab="Maize yield (kg/ha) response", ylab="Year")

Code
baseline_quad_lmList_est <- coef(baseline_quad_lmList)
baseline_quad_lmList_est <-t(as.data.frame(baseline_quad_lmList_est))
order_tab <-rep(1,11)
order_tab <- as.data.frame(order_tab)
baseline_quad_lmList_est_dt=cbind(baseline_quad_lmList_est,order_tab)


baseline_quad_lmList_ci <- confint(baseline_quad_lmList)
baseline_quad_lmList_ci_dt <-t(as.data.frame(baseline_quad_lmList_ci))
order_tab <- rep(2:3,11)
order_tab <- as.data.frame(order_tab)

baseline_quad_lmList_ci_dt=cbind(baseline_quad_lmList_ci_dt,order_tab)

baseline_quad_lmList_est_ci_dt=bind_rows(baseline_quad_lmList_est_dt,baseline_quad_lmList_ci_dt)

write.csv(baseline_quad_lmList_est_ci_dt,"tables/baseline_quad_lmList_est_ci_dt.csv")

## Visreg
library(visreg)

baseline_quad_2008 <- lm(yld_~N_kgha+I(N_kgha^2)+P_kgha+I(P_kgha^2)+plotha+ncrops+hhmem+femhead+headeduc+arain_tot,data=tz_lsms_panel_2008)

baseline_quad_2010 <- lm(yld_~N_kgha+I(N_kgha^2)+P_kgha+I(P_kgha^2)+plotha+ncrops+hhmem+femhead+headeduc+arain_tot,data=tz_lsms_panel_2010)

baseline_quad_2012 <- lm(yld_~N_kgha+I(N_kgha^2)+P_kgha+I(P_kgha^2)+plotha+ncrops+hhmem+femhead+headeduc+arain_tot,data=tz_lsms_panel_2012)

baseline_quad_2014 <- lm(yld_~N_kgha+I(N_kgha^2)+P_kgha+I(P_kgha^2)+plotha+ncrops+hhmem+femhead+headeduc+arain_tot,data=tz_lsms_panel_2014)

baseline_quad_2020 <- lm(yld_~N_kgha+I(N_kgha^2)+P_kgha+I(P_kgha^2)+plotha+ncrops+hhmem+femhead+headeduc+arain_tot,data=tz_lsms_panel_2020)

baseline_quad_2008_v <-visreg(baseline_quad_2008, "N_kgha", plot=FALSE)
baseline_quad_2010_v <-visreg(baseline_quad_2010, "N_kgha", plot=FALSE)
baseline_quad_2012_v <-visreg(baseline_quad_2012, "N_kgha", plot=FALSE)
baseline_quad_2014_v <-visreg(baseline_quad_2014, "N_kgha", plot=FALSE)
baseline_quad_2020_v <-visreg(baseline_quad_2020, "N_kgha", plot=FALSE)


baseline_quad_all_plot <- visregList(baseline_quad_2008_v,baseline_quad_2010_v,baseline_quad_2012_v,baseline_quad_2014_v,baseline_quad_2020_v, collapse=TRUE,
                  labels=c("2008","2010","2012","2014","2020"))
op <- par(mfrow=c(1,5))
par(op)

plot(baseline_quad_all_plot,ylab="Maize yield (kg per ha)",xlab="Applied nitrogen (kg per ha)", ylim=c(0,3000), main="Quadratic",partial=FALSE,rug=FALSE)

Code
## with interactionswith rainfall
# Lm List by year
baseline_quad_lmList_inter <- lmList(yld_~N_kgha*arain_tot+I(N_kgha^2)*arain_tot+P_kgha+I(P_kgha^2)+plotha+ncrops+hhmem+femhead+headeduc|year,tz_lsms_panel)

coef(baseline_quad_lmList_inter )
     (Intercept)     N_kgha   arain_tot   I(N_kgha^2)    P_kgha I(P_kgha^2)
2008   1040.3094 -3.2871208 -0.15396132  0.0006057343 30.300815 -0.17871941
2010    868.0760 10.7063983  0.09833670 -0.1258864440 12.458721 -0.01876279
2012    711.3046 17.1146000  0.10177817 -0.1437490713  4.568688 -0.02970742
2014    990.9200  0.8538752  0.06162654  0.2314043400  9.427352 -0.02145016
2020   1071.3689  5.3956296 -0.05199694  0.1124736335 12.902975 -0.05850915
        plotha     ncrops     hhmem    femhead headeduc N_kgha:arain_tot
2008 -124.4275 -100.16794 16.606152  -79.26587 73.24403      0.011713061
2010 -169.9324  -88.84453 18.110322  -81.92280 22.51346     -0.006582208
2012 -128.6052  -32.53957  9.413228  -45.26470 36.95531     -0.004910076
2014 -102.1855 -106.24935 13.990182 -109.22771 64.35072      0.005414488
2020 -117.9740  -73.87108 12.356208 -113.30966 40.02040      0.001186827
     arain_tot:I(N_kgha^2)
2008         -4.848049e-06
2010          1.632493e-04
2012          1.360002e-04
2014         -1.436327e-04
2020         -5.970733e-05
Code
(ci <- confint(baseline_quad_lmList_inter))
An object of class "lmList4.confint"
, , (Intercept)

        2.5 %   97.5 %
2008 852.4817 1228.137
2010 700.6313 1035.521
2012 584.9592  837.650
2014 832.5991 1149.241
2020 909.0784 1233.659

, , N_kgha

         2.5 %   97.5 %
2008 -22.58665 16.01241
2010 -15.21231 36.62511
2012  -2.03690 36.26610
2014 -18.91251 20.62026
2020 -14.66245 25.45371

, , arain_tot

            2.5 %       97.5 %
2008 -0.305083526 -0.002839112
2010 -0.037862053  0.234535457
2012  0.006279456  0.197276888
2014 -0.040663828  0.163916900
2020 -0.132358090  0.028364209

, , I(N_kgha^2)

            2.5 %     97.5 %
2008 -0.206689083 0.20790055
2010 -0.465588497 0.21381561
2012 -0.379794694 0.09229655
2014  0.004867746 0.45794093
2020 -0.145042144 0.36998941

, , P_kgha

          2.5 %    97.5 %
2008 20.5937300 40.007900
2010  5.3874493 19.529993
2012 -0.2379535  9.375329
2014  1.4878176 17.366885
2020  5.2871211 20.518828

, , I(P_kgha^2)

           2.5 %       97.5 %
2008 -0.27866266 -0.078776158
2010 -0.06315567  0.025630083
2012 -0.05572036 -0.003694474
2014 -0.09145123  0.048550902
2020 -0.13683735  0.019819045

, , plotha

         2.5 %     97.5 %
2008 -160.9157  -87.93927
2010 -203.0181 -136.84679
2012 -153.0810 -104.12936
2014 -134.0176  -70.35336
2020 -148.7448  -87.20315

, , ncrops

          2.5 %     97.5 %
2008 -140.41995 -59.915927
2010 -131.58091 -46.108148
2012  -66.66599   1.586852
2014 -146.17043 -66.328268
2020 -116.38174 -31.360417

, , hhmem

         2.5 %   97.5 %
2008 3.2036531 30.00865
2010 6.1305553 30.09009
2012 0.2018068 18.62465
2014 1.3971106 26.58325
2020 1.2224836 23.48993

, , femhead

         2.5 %     97.5 %
2008 -166.8731   8.341404
2010 -166.5736   2.728009
2012 -116.8874  26.357960
2014 -200.7878 -17.667667
2020 -199.4210 -27.198271

, , headeduc

          2.5 %    97.5 %
2008   5.728716 140.75934
2010 -41.379710  86.40663
2012  -9.421060  83.33169
2014   6.556664 122.14477
2020 -14.525577  94.56638

, , N_kgha:arain_tot

            2.5 %     97.5 %
2008 -0.005974799 0.02940092
2010 -0.031679989 0.01851557
2012 -0.025564361 0.01574421
2014 -0.011467416 0.02229639
2020 -0.013061123 0.01543478

, , arain_tot:I(N_kgha^2)

             2.5 %       97.5 %
2008 -0.0001907917 1.810956e-04
2010 -0.0001679257 4.944243e-04
2012 -0.0001100940 3.820944e-04
2014 -0.0003376123 5.034694e-05
2020 -0.0002499725 1.305578e-04
Code
plot(ci,cex=2, cex.lab=3, xlab="Maize yield (kg/ha) response", ylab="Year")

Code
baseline_quad_lmList_inter_est <- coef(baseline_quad_lmList_inter)
baseline_quad_lmList_inter_est <-t(as.data.frame(baseline_quad_lmList_inter_est))
order_tab <-rep(1,13)
order_tab <- as.data.frame(order_tab)
baseline_quad_lmList_inter_est_dt=cbind(baseline_quad_lmList_inter_est,order_tab)


baseline_quad_lmList_inter_ci <- confint(baseline_quad_lmList_inter)
baseline_quad_lmList_inter_ci_dt <-t(as.data.frame(baseline_quad_lmList_inter_ci))
order_tab <- rep(2:3,13)
order_tab <- as.data.frame(order_tab)

baseline_quad_lmList_inter_ci_dt=cbind(baseline_quad_lmList_inter_ci_dt,order_tab)

baseline_quad_lmList_inter_est_ci_dt=bind_rows(baseline_quad_lmList_inter_est_dt,baseline_quad_lmList_inter_ci_dt)

write.csv(baseline_quad_lmList_inter_est_ci_dt,"tables/baseline_quad_lmList_inter_est_ci_dt.csv")


library(reactable)
library(htmltools)
library(fontawesome)

htmltools::browsable(
    tagList(
        tags$button(
            tagList(fontawesome::fa("download"), "Download as CSV"),
            onclick = "Reactable.downloadDataCSV('baseline_quad_lmList_inter_est_ci_dt', 'baseline_quad_lmList_inter_est_ci_dt.csv')"
        ),
        reactable(
            baseline_quad_lmList_inter_est_ci_dt,
            searchable = TRUE,
            defaultPageSize = 38,
            elementId = "baseline_quad_lmList_inter_est_ci_dt"
        )
    )
)
Code
## Visreg
library(visreg)

baseline_quad_inter_2008 <- lm(yld_~N_kgha*arain_tot+I(N_kgha^2)*arain_tot+P_kgha+I(P_kgha^2)+plotha+ncrops+hhmem+femhead+headeduc,data=tz_lsms_panel_2008)

baseline_quad_inter_2010 <- lm(yld_~N_kgha*arain_tot+I(N_kgha^2)*arain_tot+P_kgha+I(P_kgha^2)+plotha+ncrops+hhmem+femhead+headeduc,data=tz_lsms_panel_2010)

baseline_quad_inter_2012 <- lm(yld_~N_kgha*arain_tot+I(N_kgha^2)*arain_tot+P_kgha+I(P_kgha^2)+plotha+ncrops+hhmem+femhead+headeduc,data=tz_lsms_panel_2012)

baseline_quad_inter_2014 <- lm(yld_~N_kgha*arain_tot+I(N_kgha^2)*arain_tot+P_kgha+I(P_kgha^2)+plotha+ncrops+hhmem+femhead+headeduc,data=tz_lsms_panel_2014)

baseline_quad_inter_2020 <- lm(yld_~N_kgha*arain_tot+I(N_kgha^2)*arain_tot+P_kgha+I(P_kgha^2)+plotha+ncrops+hhmem+femhead+headeduc,data=tz_lsms_panel_2020)

visreg2d(baseline_quad_inter_2008, "N_kgha", "arain_tot",ylab="Rainfall (mm)",xlab="Nitrogen (kg per ha)", main="2008")

Code
visreg2d(baseline_quad_inter_2010, "N_kgha", "arain_tot",ylab="Rainfall (mm)",xlab="Nitrogen (kg per ha)", main="2010")

Code
visreg2d(baseline_quad_inter_2012, "N_kgha", "arain_tot",ylab="Rainfall (mm)",xlab="Nitrogen (kg per ha)", main="2012")

Code
visreg2d(baseline_quad_inter_2014, "N_kgha", "arain_tot",ylab="Rainfall (mm)",xlab="Nitrogen (kg per ha)", main="2014")

Code
visreg2d(baseline_quad_inter_2020, "N_kgha", "arain_tot",ylab="Rainfall (mm)",xlab="Nitrogen (kg per ha)", main="2020")

Code
# Soil properties Interacted with soils 
baseline_quad_lmList_inter_soil <- lmList(yld_~N_kgha*soc_5_15cm+I(N_kgha^2)+P_kgha+I(P_kgha^2)+plotha+ncrops+hhmem+femhead+headeduc+arain_tot+population_density+wc2.1_30s_elev+sand_0_5cm+nitrogen_0_5cm+soc_5_15cm+ ECN_5_15cm+pH_0_5cm|year,tz_lsms_panel)

coef(baseline_quad_lmList_inter_soil )
     (Intercept)    N_kgha soc_5_15cm I(N_kgha^2)    P_kgha  I(P_kgha^2)
2008   332.82630  6.787078 -4.9803219 0.020246362 26.643060 -0.168656723
2010   485.15200  2.490868 -0.6060649 0.049132922 11.538145 -0.018545753
2012   813.23687 10.733636 11.4499568 0.001811815  3.819225 -0.027170118
2014  -472.32889 -1.419373 13.6039471 0.107457937  6.107798 -0.008539498
2020    18.36682  5.104535 -4.3053237 0.070230966  8.664753 -0.014308492
         plotha     ncrops     hhmem    femhead  headeduc    arain_tot
2008 -118.33338 -100.63247  8.295497  -98.12170 101.30778 -0.055494403
2010 -162.52023  -92.39225 14.729029  -81.51787  30.07025 -0.025783324
2012 -124.22897  -34.43923  5.802755  -34.63061  39.92241  0.070369306
2014  -93.33318 -110.03095 10.172188 -130.26257  72.46756  0.006581614
2020 -123.21524  -76.82603  7.173235 -142.97814  61.10092 -0.078886183
     population_density wc2.1_30s_elev sand_0_5cm nitrogen_0_5cm ECN_5_15cm
2008         0.01462737      0.3460713  -2.453726      19.316243   6.049551
2010        -0.01546993      0.2070832  -5.324435      15.825897   6.019511
2012         0.02330834      0.1946981  -3.019446    -148.946826   1.391007
2014         0.08828479      0.0941273  -1.437393    -110.782954  17.935999
2020         0.03451702      0.3564605  -1.811096       5.068627  18.702019
       pH_0_5cm N_kgha:soc_5_15cm
2008  76.855302       -0.08793186
2010  98.951817       -0.01743116
2012  -6.431865       -0.06007362
2014 240.528323        0.36432408
2020 143.783117       -0.10329985
Code
(ci <- confint(baseline_quad_lmList_inter_soil))
An object of class "lmList4.confint"
, , (Intercept)

          2.5 %    97.5 %
2008  -519.3404 1184.9930
2010  -388.7952 1359.0992
2012   113.6493 1512.8244
2014 -1418.5602  473.9024
2020  -876.5657  913.2993

, , N_kgha

           2.5 %    97.5 %
2008 -0.03331064 13.607466
2010 -4.55893340  9.540669
2012  4.60761413 16.859657
2014 -9.40878142  6.570036
2020 -1.62646364 11.835533

, , soc_5_15cm

           2.5 %    97.5 %
2008 -18.7467059  8.786062
2010 -13.7488881 12.536758
2012   0.7680558 22.131858
2014   1.2347875 25.973107
2020 -17.5931562  8.982509

, , I(N_kgha^2)

             2.5 %     97.5 %
2008 -0.0373248228 0.07781755
2010 -0.0186049820 0.11687083
2012 -0.0575565660 0.06118020
2014  0.0342209042 0.18069497
2020 -0.0005797513 0.14104168

, , P_kgha

          2.5 %    97.5 %
2008 17.0950427 36.191077
2010  4.5229776 18.553313
2012 -0.8898239  8.528273
2014 -1.9090176 14.124614
2020  1.1058956 16.223610

, , I(P_kgha^2)

           2.5 %       97.5 %
2008 -0.26652686 -0.070786589
2010 -0.06337602  0.026284517
2012 -0.05269627 -0.001643969
2014 -0.07776103  0.060682030
2020 -0.09230738  0.063690392

, , plotha

         2.5 %     97.5 %
2008 -154.6066  -82.06016
2010 -195.3888 -129.65170
2012 -148.8079  -99.65004
2014 -124.9147  -61.75162
2020 -154.1402  -92.29023

, , ncrops

          2.5 %      97.5 %
2008 -141.26380 -60.0011430
2010 -134.75088 -50.0336208
2012  -68.37502  -0.5034506
2014 -149.43100 -70.6309021
2020 -119.97452 -33.6775263

, , hhmem

         2.5 %   97.5 %
2008 -5.317004 21.90800
2010  2.863774 26.59428
2012 -3.454439 15.05995
2014 -2.397130 22.74151
2020 -3.890754 18.23722

, , femhead

         2.5 %     97.5 %
2008 -184.7008 -11.542566
2010 -165.7656   2.729873
2012 -105.8831  36.621917
2014 -220.1983 -40.326834
2020 -227.8198 -58.136437

, , headeduc

          2.5 %    97.5 %
2008  32.798234 169.81733
2010 -34.333477  94.47398
2012  -6.804295  86.64911
2014  13.746141 131.18898
2020   6.697638 115.50420

, , arain_tot

           2.5 %        97.5 %
2008 -0.19372271  0.0827338996
2010 -0.15793829  0.1063716456
2012 -0.02119268  0.1619312950
2014 -0.09340550  0.1065687267
2020 -0.15685137 -0.0009209911

, , population_density

            2.5 %     97.5 %
2008 -0.048741357 0.07799609
2010 -0.053257283 0.02231743
2012 -0.013951330 0.06056802
2014  0.004768678 0.17180090
2020 -0.004337163 0.07337121

, , wc2.1_30s_elev

            2.5 %    97.5 %
2008  0.261321021 0.4308216
2010  0.127063302 0.2871031
2012  0.128540308 0.2608559
2014 -0.007314814 0.1955694
2020  0.253770540 0.4591504

, , sand_0_5cm

         2.5 %      97.5 %
2008 -6.358532  1.45108019
2010 -9.093445 -1.55542534
2012 -6.112372  0.07347962
2014 -6.160724  3.28593856
2020 -6.193152  2.57096097

, , nitrogen_0_5cm

         2.5 %    97.5 %
2008 -111.5171 150.14956
2010 -105.7790 137.43083
2012 -250.7157 -47.17795
2014 -243.2346  21.66871
2020 -139.0081 149.14533

, , ECN_5_15cm

          2.5 %   97.5 %
2008  -6.340187 18.43929
2010  -7.234763 19.27378
2012 -10.058858 12.84087
2014  -1.234353 37.10635
2020  -1.341296 38.74533

, , pH_0_5cm

          2.5 %    97.5 %
2008 -25.930220 179.64082
2010  -7.826945 205.73058
2012 -90.898313  78.03458
2014 127.143891 353.91275
2020  37.703909 249.86233

, , N_kgha:soc_5_15cm

           2.5 %    97.5 %
2008 -0.34030468 0.1644410
2010 -0.29573329 0.2608710
2012 -0.28743337 0.1672861
2014  0.09170172 0.6369464
2020 -0.33619106 0.1295914
Code
plot(ci,cex=2, cex.lab=3, xlab="Maize yield (kg/ha) response", ylab="Year")

Code
baseline_quad_lmList_inter_soil_est <- coef(baseline_quad_lmList_inter_soil)
baseline_quad_lmList_inter_soil_est <-t(as.data.frame(baseline_quad_lmList_inter_soil_est))
order_tab <-rep(1,19)
order_tab <- as.data.frame(order_tab)
baseline_quad_lmList_inter_soil_est_dt=cbind(baseline_quad_lmList_inter_soil_est,order_tab)


baseline_quad_lmList_inter_soil_ci <- confint(baseline_quad_lmList_inter_soil)
baseline_quad_lmList_inter_soil_ci_dt <-t(as.data.frame(baseline_quad_lmList_inter_soil_ci))
order_tab <- rep(2:3,19)
order_tab <- as.data.frame(order_tab)

baseline_quad_lmList_inter_soil_ci_dt=cbind(baseline_quad_lmList_inter_soil_ci_dt,order_tab)

baseline_quad_lmList_inter_soil_est_ci_dt=bind_rows(baseline_quad_lmList_inter_soil_est_dt,baseline_quad_lmList_inter_soil_ci_dt)

write.csv(baseline_quad_lmList_inter_soil_est_ci_dt,"tables/baseline_quad_lmList_inter_soil_est_ci_dt.csv")


library(reactable)
library(htmltools)
library(fontawesome)

htmltools::browsable(
    tagList(
        tags$button(
            tagList(fontawesome::fa("download"), "Download as CSV"),
            onclick = "Reactable.downloadDataCSV('baseline_quad_lmList_inter_soil_est_ci_dt', 'baseline_quad_lmList_inter_soil_est_ci_dt.csv')"
        ),
        reactable(
            baseline_quad_lmList_inter_soil_est_ci_dt,
            searchable = TRUE,
            defaultPageSize = 38,
            elementId = "baseline_quad_lmList_inter_soil_est_ci_dt"
        )
    )
)
Code
## Visreg
library(visreg)

baseline_quad_inter_soils_2008 <- lm(yld_~N_kgha*soc_5_15cm+I(N_kgha^2)+P_kgha+I(P_kgha^2)+plotha+ncrops+hhmem+femhead+headeduc+arain_tot+population_density+wc2.1_30s_elev+sand_0_5cm+nitrogen_0_5cm+soc_5_15cm+ ECN_5_15cm+pH_0_5cm,data=tz_lsms_panel_2008)

baseline_quad_inter_soils_2010 <- lm(yld_~N_kgha*soc_5_15cm+I(N_kgha^2)+P_kgha+I(P_kgha^2)+plotha+ncrops+hhmem+femhead+headeduc+arain_tot+population_density+wc2.1_30s_elev+sand_0_5cm+nitrogen_0_5cm+soc_5_15cm+ ECN_5_15cm+pH_0_5cm,data=tz_lsms_panel_2010)

baseline_quad_inter_soils_2012 <- lm(yld_~N_kgha*soc_5_15cm+I(N_kgha^2)+P_kgha+I(P_kgha^2)+plotha+ncrops+hhmem+femhead+headeduc+arain_tot+population_density+wc2.1_30s_elev+sand_0_5cm+nitrogen_0_5cm+soc_5_15cm+ ECN_5_15cm+pH_0_5cm,data=tz_lsms_panel_2012)

baseline_quad_inter_soils_2014 <- lm(yld_~N_kgha*soc_5_15cm+I(N_kgha^2)+P_kgha+I(P_kgha^2)+plotha+ncrops+hhmem+femhead+headeduc+arain_tot+population_density+wc2.1_30s_elev+sand_0_5cm+nitrogen_0_5cm+soc_5_15cm+ ECN_5_15cm+pH_0_5cm,data=tz_lsms_panel_2014)

baseline_quad_inter_soils_2020 <- lm(yld_~N_kgha*soc_5_15cm+I(N_kgha^2)+P_kgha+I(P_kgha^2)+plotha+ncrops+hhmem+femhead+headeduc+arain_tot+population_density+wc2.1_30s_elev+sand_0_5cm+nitrogen_0_5cm+soc_5_15cm+ ECN_5_15cm+pH_0_5cm,data=tz_lsms_panel_2020)

visreg2d(baseline_quad_inter_soils_2008, "N_kgha", "soc_5_15cm",ylab="Soil carbon (g per kg)",xlab="Nitrogen (kg per ha)", main="2008")

Code
visreg2d(baseline_quad_inter_soils_2010, "N_kgha", "soc_5_15cm",ylab="Soil carbon (g per kg)",xlab="Nitrogen (kg per ha)", main="2010")

Code
visreg2d(baseline_quad_inter_soils_2012, "N_kgha", "soc_5_15cm",ylab="Soil carbon (g per kg)",xlab="Nitrogen (kg per ha)", main="2012")

Code
visreg2d(baseline_quad_inter_soils_2014, "N_kgha", "soc_5_15cm",ylab="Soil carbon (g per kg)",xlab="Nitrogen (kg per ha)", main="2014")

Code
visreg2d(baseline_quad_inter_soils_2020, "N_kgha", "soc_5_15cm",ylab="Soil carbon (g per kg)",xlab="Nitrogen (kg per ha)", main="2020")

Code
# Quadratic plateau or linear plateau
library(easynls)
tz_lsms_panel.temp <- tz_lsms_panel[, c("yld_","N_kgha")]
nls.QP <- nlsfit(tz_lsms_panel.temp, model = 4)
nls.QP$Model
[1] "y ~ (a + b * x + c * I(x^2)) * (x <= -0.5 * b/c) + (a + I(-b^2/(4 * c))) * (x > -0.5 * b/c)"
Code
mn=nls.QP$Parameters
mn
                                      N_kgha
coefficient a                  -3.909774e-01
coefficient b                   1.052253e-02
coefficient c                  -1.090000e-06
p-value t.test for a            2.841850e-01
p-value t.test for b            0.000000e+00
p-value t.test for c            0.000000e+00
r-squared                       8.650000e-02
adjusted r-squared              8.640000e-02
AIC                             8.165610e+04
BIC                             8.168471e+04
maximum or minimum value for y  2.499715e+01
critical point in x             4.825482e+03
Code
summary(mn)
     N_kgha        
 Min.   :   -0.39  
 1st Qu.:    0.00  
 Median :    0.09  
 Mean   :14015.95  
 3rd Qu.: 1225.12  
 Max.   :81684.71  

5.1.1 Site-year specific Quadratic Only response functions

The site-year specific Quadratic response function can be modeled as At level 1, we have \[ Y_{i} = a_{i} + b_{i}*N + c_{i}*N^2 + \varepsilon_{i} \] At level 2, we have \[ a_{i} \sim N(a_0, \sigma_{a}^2) \\ b_{i} \sim N(b_0, \sigma_{b}^2) \\ c_{i} \sim N(c_0, \sigma_{c}^2) \\ \] This model can be estimated with the linear mixed model function lmer in R package lme4

Code
tz_lsms_panel.temp <- tz_lsms_panel[, .(yld_, N_kgha, N2 = N_kgha^2, year)]
lmer.Q <- lmer(yld_ ~ 1 + N_kgha + N2  + (1 | year) + (0 + N_kgha | year) + (0 + N2 | year), data = tz_lsms_panel.temp)
lmer.Q
Linear mixed model fit by REML ['lmerMod']
Formula: yld_ ~ 1 + N_kgha + N2 + (1 | year) + (0 + N_kgha | year) + (0 +  
    N2 | year)
   Data: tz_lsms_panel.temp
REML criterion at convergence: 152692.7
Random effects:
 Groups   Name        Std.Dev. 
 year     (Intercept) 8.001e+01
 year.1   N_kgha      2.748e+00
 year.2   N2          6.472e-03
 Residual             7.960e+02
Number of obs: 9426, groups:  year, 5
Fixed Effects:
(Intercept)       N_kgha           N2  
  748.29010     11.24225      0.01232  
fit warnings:
Some predictor variables are on very different scales: consider rescaling
optimizer (nloptwrap) convergence code: 0 (OK) ; 0 optimizer warnings; 1 lme4 warnings 
Code
summary(lmer.Q)
Linear mixed model fit by REML ['lmerMod']
Formula: yld_ ~ 1 + N_kgha + N2 + (1 | year) + (0 + N_kgha | year) + (0 +  
    N2 | year)
   Data: tz_lsms_panel.temp

REML criterion at convergence: 152692.7

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.3333 -0.6213 -0.3108  0.2772  6.6397 

Random effects:
 Groups   Name        Variance  Std.Dev. 
 year     (Intercept) 6.402e+03 8.001e+01
 year.1   N_kgha      7.554e+00 2.748e+00
 year.2   N2          4.188e-05 6.472e-03
 Residual             6.337e+05 7.960e+02
Number of obs: 9426, groups:  year, 5

Fixed effects:
             Estimate Std. Error t value
(Intercept) 748.29010   36.88270  20.288
N_kgha       11.24225    1.75175   6.418
N2            0.01232    0.01525   0.808

Correlation of Fixed Effects:
       (Intr) N_kgha
N_kgha -0.051       
N2      0.046 -0.657
fit warnings:
Some predictor variables are on very different scales: consider rescaling
optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')

Or, althernatively, can be estimated with the non-linear mixed model function nlme

Code
library(nlme)
nlme.Q <- nlme(yld_ ~ (a + b * N_kgha + c * (N_kgha^2)),
                    data = tz_lsms_panel,
                    fixed = a + b + c ~ 1,
                    random = a + b + c ~ 1,
                    groups = ~ year,
                    start = c(800, 10, -0.001))

nlme.Q
Nonlinear mixed-effects model fit by maximum likelihood
  Model: yld_ ~ (a + b * N_kgha + c * (N_kgha^2)) 
  Data: tz_lsms_panel 
  Log-likelihood: -76345.33
  Fixed: a + b + c ~ 1 
           a            b            c 
747.82829275  11.16211183   0.01323599 

Random effects:
 Formula: list(a ~ 1, b ~ 1, c ~ 1)
 Level: year
 Structure: General positive-definite, Log-Cholesky parametrization
         StdDev       Corr         
a         57.45976820 a      b     
b          1.64304646 -0.022       
c          0.02343941  0.946 -0.341
Residual 795.98608057              

Number of Observations: 9426
Number of Groups: 5 

5.1.2 Site-year specific Quadratic-plus-plateau response functions

The site-year specific response function can be modeled using a hierarchical quadratic-plus-plateau model. At level 1, we have \[ Y_{i} = \min(a_{i} + b_{i}*N + c_{i}*N^2, Y_{max}) + \varepsilon_{i} \] At level 2, we have \[ a_{i} \sim N(a_0, \sigma_{a}^2) \\ b_{i} \sim N(b_0, \sigma_{b}^2) \\ c_{i} \sim N(c_0, \sigma_{c}^2) \\ Y_{max} = a_{i} - b_{i}^2/(4*c_i) \]

It seems the R function nlme would work to estimate this model

Code
# Define quadratic-plus-plateau function
mq <- lm(yld_ ~ N_kgha + I(N_kgha^2), data=tz_lsms_panel)
a0 <- coef(mq)[[1]]
b0 <- coef(mq)[[2]]
c0 <- coef(mq)[[3]]
clx0 <- -0.5*b0/c0

# Test nls

# fx.QP <- function(N, a, b, c) {
#   y <- (a + b * N + c * I(N^2)) * (N <= -0.5 * b/c) + (a + I(-b^2/(4 * c))) * (N > -0.5 * b/c)
#   return(y)
# }
# 
# nls.QP <- nls(Y ~ fx.QP(N, a, b, c),
#             start = list(a = a0, b = b0, c = c0), data = dat.Puntel.CC.mean,
#             control = nls.control(maxiter = 6000))


# quadplat <- function(x, a, b, clx) {
#   ifelse(x  < clx, a + b * x   + (-0.5*b/clx) * x   * x, 
#                             a + b * clx + (-0.5*b/clx) * clx * clx)
# }
# 
# nls.QP <- nls(Y ~ quadplat(N, a, b, clx), 
#             start = list(a = a0, b = b0, clx = clx0), data = dat.Puntel.CC.mean, 
#             control = nls.control(maxiter = 6000))

# nlme.QP <- nlme(Y ~ fx.QP(N, a, b, c),
#                     data = dat.Puntel.CC.mean,
#                     fixed = a + b + c ~ 1,
#                     random = a + b + c ~ 1,
#                     groups = ~ year,
#                     start = c(a0, b0, c0))
# 
# nlme.QP

# tz_lsms_panel.nlme <- nlme(yld_ ~ (a + b * N_kgha  + c * I(N_kgha  ^2)) * (N_kgha  <= (-0.5 * b/c)) + (a- I(b^2/(4 * c))) * (N_kgha  > (-0.5 * b/c)),
#                     data = tz_lsms_panel,
#                     fixed = a + b + c ~ 1,
#                     random = a + b + c ~ 1,
#                     groups = ~ year,
#                     start = c(a0, b0, c0))
# tz_lsms_panel.nlme

5.1.3 Bayesian analysis

Code
# library(brms)
# 
# tz_lsms_panel$Nsq_kgha=tz_lsms_panel$N_kgha^2
# tz_lsms_panel$Y=tz_lsms_panel$yld_
# 
# f1 <- Y ~ (a+ b*N_kgha+c*(Nsq_kgha))*(N_kgha<=(-0.5*b/c))+(a-(b*b/(4*c)))*(N_kgha>(-0.5*b/c))
# 
# prior_1=c(set_prior("normal(5,1)", nlpar="a"),
# set_prior("normal(0,1)", nlpar="b"),
# set_prior("normal(0,1)", nlpar="c"))
# 
# form=bf(f1,nl=TRUE)+list(a~1|year,b~1|year,c~1|year)
# 
# bayesQP=brm(form,prior=prior_1,data=tz_lsms_panel)
# 
# summary(bayesQP)            

5.2 Nonlinear parameter models: geoadditive model

Code
set.seed(321)

library(bamlss)

# Linear
f1 <- yld_~N_kgha + plotha + P_kgha + ncrops + hhmem + femhead + headeduc + arain_tot +population_density+wc2.1_30s_elev+sand_0_5cm+nitrogen_0_5cm+soc_5_15cm+ ECN_5_15cm+pH_0_5cm+ region + year

b1 <- bamlss(f1, data = tz_lsms_panel, family = "gaussian", n.iter = 12000, burnin = 2000, thin = 10)
AICc 147908.5 logPost -74304.0 logLik -73902.9 edf 51.000 eps 0.2856 iteration   1
AICc 147906.0 logPost -74302.7 logLik -73901.7 edf 51.000 eps 0.0008 iteration   2
AICc 147906.0 logPost -74302.7 logLik -73901.7 edf 51.000 eps 0.0000 iteration   3
AICc 147906.0 logPost -74302.7 logLik -73901.7 edf 51.000 eps 0.0000 iteration   3
elapsed time:  0.05sec
Starting the sampler...

|                    |   0%  7.59min
|*                   |   5%  7.34min 23.17sec
|**                  |  10%  7.09min 47.27sec
|***                 |  15%  7.12min  1.26min
|****                |  20%  6.54min  1.64min
|*****               |  25%  6.06min  2.02min
|******              |  30%  5.60min  2.40min
|*******             |  35%  5.26min  2.83min
|********            |  40%  4.86min  3.24min
|*********           |  45%  4.44min  3.64min
|**********          |  50%  4.01min  4.01min
|***********         |  55%  3.60min  4.39min
|************        |  60%  3.23min  4.84min
|*************       |  65%  2.84min  5.27min
|**************      |  70%  2.42min  5.64min
|***************     |  75%  2.00min  6.01min
|****************    |  80%  1.60min  6.39min
|*****************   |  85%  1.21min  6.84min
|******************  |  90% 48.09sec  7.21min
|******************* |  95% 24.00sec  7.60min
|********************| 100%  0.00sec  8.01min
Code
b1 <- gam(f1, data = tz_lsms_panel, family = "gaussian", n.iter = 12000, burnin = 2000, thin = 10)

summary(b1)

Family: gaussian 
Link function: identity 

Formula:
yld_ ~ N_kgha + plotha + P_kgha + ncrops + hhmem + femhead + 
    headeduc + arain_tot + population_density + wc2.1_30s_elev + 
    sand_0_5cm + nitrogen_0_5cm + soc_5_15cm + ECN_5_15cm + pH_0_5cm + 
    region + year

Parametric coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)         718.97922  246.52773   2.916 0.003549 ** 
N_kgha                8.52450    0.47716  17.865  < 2e-16 ***
plotha             -135.62605    7.01133 -19.344  < 2e-16 ***
P_kgha                5.11078    0.75059   6.809 1.04e-11 ***
ncrops              -59.15740    9.10338  -6.498 8.54e-11 ***
hhmem                15.08730    2.67438   5.641 1.74e-08 ***
femhead             -83.89112   18.78015  -4.467 8.03e-06 ***
headeduc             62.89908   12.80166   4.913 9.11e-07 ***
arain_tot            -0.01477    0.02349  -0.629 0.529531    
population_density    0.01722    0.01153   1.493 0.135395    
wc2.1_30s_elev        0.21164    0.03285   6.443 1.23e-10 ***
sand_0_5cm           -4.71812    1.03684  -4.550 5.42e-06 ***
nitrogen_0_5cm     -115.23103   36.29099  -3.175 0.001502 ** 
soc_5_15cm            0.14188    3.30973   0.043 0.965807    
ECN_5_15cm           -0.34199    3.51547  -0.097 0.922506    
pH_0_5cm             31.92882   30.71503   1.040 0.298591    
region2             238.40378   65.78418   3.624 0.000292 ***
region3             217.00663   63.07191   3.441 0.000583 ***
region4             411.06674   67.97759   6.047 1.53e-09 ***
region5             204.02754   60.34312   3.381 0.000725 ***
region6             -21.62924   91.09097  -0.237 0.812316    
region7             197.03835  138.54958   1.422 0.155017    
region8             122.31516   63.12055   1.938 0.052678 .  
region9              58.03671   57.40579   1.011 0.312048    
region10            144.94753   53.77690   2.695 0.007044 ** 
region11            135.09183   51.40868   2.628 0.008608 ** 
region12            285.78864   48.78499   5.858 4.84e-09 ***
region13             84.14019   61.98880   1.357 0.174705    
region14             14.84556   49.67560   0.299 0.765061    
region15            468.98856   57.42106   8.168 3.56e-16 ***
region16            -60.14323   61.60002  -0.976 0.328916    
region17            -42.91851   51.75435  -0.829 0.406971    
region18           -116.12333   65.88245  -1.763 0.078004 .  
region19            -94.79948   57.68443  -1.643 0.100331    
region20             11.91775   73.11425   0.163 0.870521    
region21            366.45333   64.80407   5.655 1.61e-08 ***
region22            -53.62290   89.50695  -0.599 0.549126    
region23            551.70143   90.79113   6.077 1.28e-09 ***
region24             47.37706   65.27068   0.726 0.467946    
region25            184.00348   88.13499   2.088 0.036848 *  
region26            290.27847  170.02019   1.707 0.087797 .  
region51            346.61470  238.87824   1.451 0.146811    
region52            128.51685  311.49873   0.413 0.679927    
region53           1300.92519  528.81411   2.460 0.013909 *  
region54            -34.22778  744.86225  -0.046 0.963350    
region55           -230.65619  287.16808  -0.803 0.421874    
year2010             48.15114   25.91661   1.858 0.063212 .  
year2012             15.96350   25.02859   0.638 0.523613    
year2014            207.38753   27.91754   7.429 1.20e-13 ***
year2020            170.26837   29.89931   5.695 1.27e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1


R-sq.(adj) =  0.197   Deviance explained = 20.1%
GCV = 5.5385e+05  Scale est. = 5.5084e+05  n = 9208
Code
library(visreg)
visreg(b1,"N_kgha")

Code
# Nonlinear
# f2=yld_~s(N_kgha)+s(plotha)+s(P_kgha)+s(ncrops)+s(hhmem)+femhead+headeduc+s(arain_tot)+ti(region)+ti(year)
#
# b2=bamlss(f2,data=tz_lsms_panel,family="gaussian", n.iter=12000, burnin=2000, thin=10)
#
#
# summary(b2)
# plot(b2, ask=FALSE)
# #
library(bamlss)
f3 <- list(
    yld_ ~ s(N_kgha, by = year) + plotha + P_kgha + ncrops + hhmem + femhead + headeduc + arain_tot +population_density+wc2.1_30s_elev+sand_0_5cm+nitrogen_0_5cm+soc_5_15cm+ ECN_5_15cm+pH_0_5cm,

    sigma ~ s(N_kgha, by = year) + plotha + P_kgha + ncrops + hhmem + femhead + headeduc + arain_tot+population_density+wc2.1_30s_elev+sand_0_5cm+nitrogen_0_5cm+soc_5_15cm+ ECN_5_15cm+pH_0_5cm
)

b3 <- bamlss(f3, data = tz_lsms_panel)
AICc 147510.5 logPost -74164.3 logLik -73700.7 edf 54.181 eps 0.3607 iteration   1
AICc 147076.9 logPost -73984.7 logLik -73480.2 edf 57.867 eps 0.0593 iteration   2
AICc 147025.5 logPost -73984.8 logLik -73450.9 edf 61.365 eps 0.0201 iteration   3
AICc 147005.0 logPost -73995.9 logLik -73438.3 edf 63.759 eps 0.0067 iteration   4
AICc 146988.8 logPost -74009.3 logLik -73426.1 edf 67.782 eps 0.0084 iteration   5
AICc 146981.5 logPost -74022.0 logLik -73419.7 edf 70.491 eps 0.0034 iteration   6
AICc 146980.6 logPost -74025.5 logLik -73418.4 edf 71.329 eps 0.0014 iteration   7
AICc 146980.5 logPost -74025.6 logLik -73418.3 edf 71.385 eps 0.0005 iteration   8
AICc 146980.4 logPost -74025.7 logLik -73418.2 edf 71.392 eps 0.0003 iteration   9
AICc 146980.4 logPost -74025.6 logLik -73418.2 edf 71.392 eps 0.0001 iteration  10
AICc 146980.4 logPost -74025.6 logLik -73418.2 edf 71.392 eps 0.0000 iteration  11
AICc 146980.4 logPost -74025.6 logLik -73418.2 edf 71.392 eps 0.0000 iteration  11
elapsed time:  7.84sec
Starting the sampler...

|                    |   0%  1.09min
|*                   |   5%  1.10min  3.47sec
|**                  |  10%  1.09min  7.28sec
|***                 |  15%  1.07min 11.32sec
|****                |  20%  1.11min 16.61sec
|*****               |  25%  1.10min 22.00sec
|******              |  30%  1.07min 27.63sec
|*******             |  35%  1.03min 33.36sec
|********            |  40% 58.79sec 39.19sec
|*********           |  45% 53.67sec 43.91sec
|**********          |  50% 48.58sec 48.58sec
|***********         |  55% 43.36sec 53.00sec
|************        |  60% 38.43sec 57.64sec
|*************       |  65% 33.34sec  1.03min
|**************      |  70% 28.33sec  1.10min
|***************     |  75% 23.43sec  1.17min
|****************    |  80% 18.64sec  1.24min
|*****************   |  85% 13.90sec  1.31min
|******************  |  90%  9.21sec  1.38min
|******************* |  95%  4.59sec  1.45min
|********************| 100%  0.00sec  1.52min
Code
summary(b3)

Call:
bamlss(formula = f3, data = tz_lsms_panel)
---
Family: gaussian 
Link function: mu = identity, sigma = log
*---
Formula mu:
---
yld_ ~ s(N_kgha, by = year) + plotha + P_kgha + ncrops + hhmem + 
    femhead + headeduc + arain_tot + population_density + wc2.1_30s_elev + 
    sand_0_5cm + nitrogen_0_5cm + soc_5_15cm + ECN_5_15cm + pH_0_5cm
-
Parametric coefficients:
                         Mean       2.5%        50%      97.5% parameters
(Intercept)         1.306e+02 -2.216e+02  1.314e+02  4.708e+02    123.318
plotha             -6.811e+01 -7.656e+01 -6.797e+01 -6.020e+01    -68.100
P_kgha              7.646e+00  5.150e+00  7.668e+00  1.005e+01      7.362
ncrops             -5.075e+01 -6.237e+01 -5.095e+01 -3.848e+01    -50.101
hhmem               6.663e+00  2.481e+00  6.703e+00  1.104e+01      6.827
femhead            -6.655e+01 -9.583e+01 -6.626e+01 -3.417e+01    -65.173
headeduc            6.431e+01  3.902e+01  6.456e+01  8.957e+01     66.178
arain_tot           2.440e-02 -1.025e-02  2.372e-02  6.130e-02      0.026
population_density  3.129e-02  3.099e-03  2.967e-02  6.872e-02      0.027
wc2.1_30s_elev      2.087e-01  1.753e-01  2.079e-01  2.416e-01      0.209
sand_0_5cm         -2.011e+00 -3.488e+00 -2.014e+00 -3.951e-01     -1.980
nitrogen_0_5cm     -2.361e+01 -7.042e+01 -2.411e+01  2.053e+01    -22.706
soc_5_15cm          3.764e+00 -9.591e-01  3.796e+00  8.454e+00      3.751
ECN_5_15cm          1.093e+01  3.716e+00  1.082e+01  1.869e+01     11.088
pH_0_5cm            9.797e+01  5.453e+01  9.717e+01  1.417e+02     97.893
-
Acceptance probability:
        Mean   2.5%    50% 97.5%
alpha 0.9860 0.9079 0.9994     1
-
Smooth terms:
                                  Mean      2.5%       50%     97.5% parameters
s(N_kgha,by=year):2008.tau21 4.154e+05 2.586e-01 1.437e+02 1.305e+06  1.545e+00
s(N_kgha,by=year):2008.alpha 1.000e+00 1.000e+00 1.000e+00 1.000e+00         NA
s(N_kgha,by=year):2008.edf   1.276e+00 9.788e-01 1.003e+00 3.212e+00  9.970e-01
s(N_kgha,by=year):2010.tau21 2.172e+06 7.961e+04 1.078e+06 1.023e+07  1.070e+06
s(N_kgha,by=year):2010.alpha 1.000e+00 1.000e+00 1.000e+00 1.000e+00         NA
s(N_kgha,by=year):2010.edf   3.774e+00 2.026e+00 3.676e+00 5.895e+00  3.722e+00
s(N_kgha,by=year):2012.tau21 1.675e+05 2.208e+01 1.164e+04 1.090e+06  2.920e-01
s(N_kgha,by=year):2012.alpha 1.000e+00 1.000e+00 1.000e+00 1.000e+00         NA
s(N_kgha,by=year):2012.edf   1.493e+00 1.000e+00 1.217e+00 3.476e+00  9.860e-01
s(N_kgha,by=year):2014.tau21 5.093e+06 3.200e+05 3.048e+06 2.105e+07  4.657e+06
s(N_kgha,by=year):2014.alpha 1.000e+00 1.000e+00 1.000e+00 1.000e+00         NA
s(N_kgha,by=year):2014.edf   4.188e+00 2.472e+00 4.147e+00 6.114e+00  4.599e+00
s(N_kgha,by=year):2020.tau21 2.628e+06 3.348e-01 9.525e+05 1.745e+07  2.488e+06
s(N_kgha,by=year):2020.alpha 1.000e+00 1.000e+00 1.000e+00 1.000e+00         NA
s(N_kgha,by=year):2020.edf   3.025e+00 9.842e-01 3.195e+00 5.860e+00  3.982e+00
---
Formula sigma:
---
sigma ~ s(N_kgha, by = year) + plotha + P_kgha + ncrops + hhmem + 
    femhead + headeduc + arain_tot + population_density + wc2.1_30s_elev + 
    sand_0_5cm + nitrogen_0_5cm + soc_5_15cm + ECN_5_15cm + pH_0_5cm
-
Parametric coefficients:
                         Mean       2.5%        50%      97.5% parameters
(Intercept)         5.717e+00  5.372e+00  5.720e+00  6.080e+00      5.723
plotha             -1.196e-01 -1.295e-01 -1.200e-01 -1.085e-01     -0.120
P_kgha              3.320e-03  1.577e-03  3.306e-03  5.029e-03      0.003
ncrops             -7.837e-02 -9.451e-02 -7.802e-02 -6.342e-02     -0.079
hhmem               7.382e-03  2.670e-03  7.470e-03  1.245e-02      0.007
femhead            -8.498e-02 -1.228e-01 -8.486e-02 -4.878e-02     -0.087
headeduc            1.187e-01  9.719e-02  1.186e-01  1.402e-01      0.120
arain_tot          -7.409e-06 -4.941e-05 -8.141e-06  3.337e-05      0.000
population_density  4.376e-05  1.671e-05  4.306e-05  7.286e-05      0.000
wc2.1_30s_elev      2.208e-04  1.870e-04  2.211e-04  2.574e-04      0.000
sand_0_5cm         -1.598e-03 -3.244e-03 -1.605e-03  1.458e-04     -0.002
nitrogen_0_5cm      6.620e-02  1.655e-02  6.616e-02  1.172e-01      0.068
soc_5_15cm         -4.202e-03 -9.697e-03 -3.902e-03  7.907e-04     -0.004
ECN_5_15cm          1.373e-02  6.786e-03  1.379e-02  2.149e-02      0.014
pH_0_5cm            1.371e-01  9.640e-02  1.380e-01  1.789e-01      0.137
-
Acceptance probability:
           Mean      2.5%       50% 97.5%
alpha 0.5250304 0.0003718 0.5129290     1
-
Smooth terms:
                                  Mean      2.5%       50%     97.5% parameters
s(N_kgha,by=year):2008.tau21 1.430e+01 3.628e-01 7.719e+00 7.216e+01     35.875
s(N_kgha,by=year):2008.alpha 7.969e-01 2.273e-01 9.106e-01 1.000e+00         NA
s(N_kgha,by=year):2008.edf   4.930e+00 2.675e+00 5.006e+00 7.216e+00      6.540
s(N_kgha,by=year):2010.tau21 1.512e+01 2.636e+00 1.133e+01 5.149e+01     12.185
s(N_kgha,by=year):2010.alpha 7.954e-01 1.618e-01 9.062e-01 1.000e+00         NA
s(N_kgha,by=year):2010.edf   5.950e+00 4.412e+00 5.936e+00 7.532e+00      6.019
s(N_kgha,by=year):2012.tau21 4.357e-01 9.376e-05 3.787e-02 3.966e+00      0.913
s(N_kgha,by=year):2012.alpha 9.205e-01 4.846e-01 9.908e-01 1.000e+00         NA
s(N_kgha,by=year):2012.edf   2.034e+00 1.003e+00 1.697e+00 4.857e+00      3.533
s(N_kgha,by=year):2014.tau21 4.887e+00 4.419e-01 2.869e+00 2.319e+01      4.782
s(N_kgha,by=year):2014.alpha 7.284e-01 5.818e-02 8.178e-01 1.000e+00         NA
s(N_kgha,by=year):2014.edf   4.330e+00 2.796e+00 4.219e+00 6.418e+00      4.710
s(N_kgha,by=year):2020.tau21 7.864e+00 5.636e-01 4.674e+00 3.574e+01     15.593
s(N_kgha,by=year):2020.alpha 7.815e-01 1.840e-01 8.803e-01 1.000e+00         NA
s(N_kgha,by=year):2020.edf   5.028e+00 3.118e+00 4.987e+00 7.193e+00      6.305
---
Sampler summary:
-
DIC = 146996.9 logLik = -73462.92 pd = 71.0341
runtime = 92.31
---
Optimizer summary:
-
AICc = 146980.5 edf = 71.3927 logLik = -73418.29
logPost = -74025.7 nobs = 9208 runtime = 7.84
Code
# plot(b3,ask=FALSE)

plot(b3, cex = 2.5, model = "mu", term = "s(N_kgha,by=year)")

Code
plot(b3, cex = 2.5, model = "sigma", term = "s(N_kgha,by=year")

Code
# GAM model
f_gam <-  yld_ ~ s(N_kgha, by = year,sp=0.1) + plotha + P_kgha + ncrops + hhmem + femhead + headeduc + arain_tot +population_density+wc2.1_30s_elev+sand_0_5cm+nitrogen_0_5cm+soc_5_15cm+ ECN_5_15cm+pH_0_5cm
    
b_gam <- gam(f_gam,data = tz_lsms_panel)

previous_theme <- theme_set(theme_bw())
visreg_polynomial_plot=visreg(b_gam,"N_kgha","year",gg=TRUE,xlab="Applied nitrogen (kg per ha)", ylab="Maize yield (kg per ha)",partial=FALSE,rug=FALSE)
visreg_polynomial_plot

Code
ggsave("figures/visreg_polynomial_plot.png",width=4.16,height=3.16)

dev.off()
null device 
          1 
Code
visreg2d(b_gam,"year", "N_kgha", plot.type="image")

6 Causal RF approach

6.1 Binary treatment

Code
library(grf)
library(policytree)

tz_lsms_panel_estim_fert <- subset(tz_lsms_panel, select = c("fert1_bin","yld_", "N_kgha", "plotha", "P_kgha", "ncrops", "hhmem", "femhead", "headeduc", "arain_tot", "population_density", "wc2.1_30s_elev", "sand_0_5cm", "nitrogen_0_5cm", "soc_5_15cm", "ECN_5_15cm", "pH_0_5cm", "region", "year", "lat_modified", "lon_modified"))

tz_lsms_panel_estim_fert$region <- as.numeric(tz_lsms_panel_estim_fert$region)
tz_lsms_panel_estim_fert$year <- as.numeric(tz_lsms_panel_estim_fert$year)

library(tidyr)
tz_lsms_panel_estim_fert <- tz_lsms_panel_estim_fert %>% drop_na()


Y_cf_fert <- as.vector(tz_lsms_panel_estim_fert$yld_)
## Causal random forest -----------------

X_cf_fert <- subset(tz_lsms_panel_estim_fert, select = c("plotha", "ncrops", "hhmem", "femhead", "headeduc", "arain_tot", "population_density", "wc2.1_30s_elev", "sand_0_5cm", "nitrogen_0_5cm", "soc_5_15cm", "ECN_5_15cm", "pH_0_5cm", "region", "year"))

X_firststage_cf_fert <- subset(tz_lsms_panel_estim_fert, select = c("plotha", "ncrops", "hhmem", "femhead", "headeduc", "arain_tot", "population_density", "wc2.1_30s_elev", "sand_0_5cm", "nitrogen_0_5cm", "soc_5_15cm", "ECN_5_15cm", "pH_0_5cm", "region", "year"))


W_cf_fert_binary <- as.vector(tz_lsms_panel_estim_fert$fert1_bin)

# Probability random forest to create weights
W.multi_fert.forest_binary <- regression_forest(X_cf_fert, W_cf_fert_binary,
    equalize.cluster.weights = FALSE,
    seed = 2
)
W.hat.multi.all_fert_binary <- predict(W.multi_fert.forest_binary, estimate.variance = TRUE)$predictions


# Regression forest to get expected responses
Y.multi_fert.forest_binary <- regression_forest(X_cf_fert, Y_cf_fert,
    equalize.cluster.weights = FALSE,
    seed = 2
)

print(Y.multi_fert.forest_binary)
GRF forest object of type regression_forest 
Number of trees: 2000 
Number of training samples: 9208 
Variable importance: 
    1     2     3     4     5     6     7     8     9    10    11    12    13 
0.706 0.013 0.003 0.001 0.004 0.005 0.005 0.212 0.007 0.006 0.020 0.003 0.007 
   14    15 
0.005 0.003 
Code
varimp.multi_fert_binary <- variable_importance(Y.multi_fert.forest_binary)
Y.hat.multi.all_fert_binary <- predict(Y.multi_fert.forest_binary, estimate.variance = TRUE)$predictions

# Fit binary causal RF model
multi_fert.forest_binary <- causal_forest(X = X_cf_fert, Y = Y_cf_fert, W = W_cf_fert_binary, W.hat = W.hat.multi.all_fert_binary, Y.hat = Y.hat.multi.all_fert_binary, seed = 2)

varimp.multi_fert_cf_binary <- variable_importance(multi_fert.forest_binary)

# Average treatment effects
multi_fert_ate_binary <- average_treatment_effect(multi_fert.forest_binary, target.sample = "overlap")
multi_fert_ate_binary
 estimate   std.err 
296.77835  30.26994 
Code
multi_fert_binary_calibration <- test_calibration(multi_fert.forest_binary)
multi_fert_binary_calibration

Best linear fit using forest predictions (on held-out data)
as well as the mean forest prediction as regressors, along
with one-sided heteroskedasticity-robust (HC3) SEs:

                               Estimate Std. Error t value    Pr(>t)    
mean.forest.prediction          0.99724    0.10227  9.7509 < 2.2e-16 ***
differential.forest.prediction  1.25318    0.26830  4.6708 1.521e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

6.2 Continuous treatment

Code
library(grf)
library(policytree)

tz_lsms_panel_estim_fert <- subset(tz_lsms_panel, select = c("fert1_bin","yld_", "N_kgha", "plotha", "P_kgha", "ncrops", "hhmem", "femhead", "headeduc", "arain_tot", "population_density", "wc2.1_30s_elev", "sand_0_5cm", "nitrogen_0_5cm", "soc_5_15cm", "ECN_5_15cm", "pH_0_5cm", "region", "year", "lat_modified", "lon_modified","maipkg_usd","Npkg_usd"))

tz_lsms_panel_estim_fert$region <- as.numeric(tz_lsms_panel_estim_fert$region)
tz_lsms_panel_estim_fert$year <- as.numeric(tz_lsms_panel_estim_fert$year)

library(tidyr)
tz_lsms_panel_estim_fert <- tz_lsms_panel_estim_fert %>% drop_na()


Y_cf_fert <- as.vector(tz_lsms_panel_estim_fert$yld_)
## Causal random forest -----------------

X_cf_fert <- subset(tz_lsms_panel_estim_fert, select = c("plotha", "ncrops", "hhmem", "femhead", "headeduc", "arain_tot", "population_density", "wc2.1_30s_elev", "sand_0_5cm", "nitrogen_0_5cm", "soc_5_15cm", "ECN_5_15cm", "pH_0_5cm", "region", "year"))

X_firststage_cf_fert <- subset(tz_lsms_panel_estim_fert, select = c("plotha", "ncrops", "hhmem", "femhead", "headeduc", "arain_tot", "population_density", "wc2.1_30s_elev", "sand_0_5cm", "nitrogen_0_5cm", "soc_5_15cm", "ECN_5_15cm", "pH_0_5cm", "region", "year"))


W_cf_fert_continuous <- as.vector(tz_lsms_panel_estim_fert$N_kgha)

# Probability random forest to create weights
W.multi_fert.forest_continuous <- regression_forest(X_cf_fert, W_cf_fert_continuous,
    equalize.cluster.weights = FALSE,
    seed = 2
)
W.hat.multi.all_fert_continuous <- predict(W.multi_fert.forest_continuous, estimate.variance = TRUE)$predictions


# Regression forest to get expected responses
Y.multi_fert.forest_continuous <- regression_forest(X_cf_fert, Y_cf_fert,
    equalize.cluster.weights = FALSE,
    seed = 2
)

print(Y.multi_fert.forest_continuous)
GRF forest object of type regression_forest 
Number of trees: 2000 
Number of training samples: 9208 
Variable importance: 
    1     2     3     4     5     6     7     8     9    10    11    12    13 
0.706 0.013 0.003 0.001 0.004 0.005 0.005 0.212 0.007 0.006 0.020 0.003 0.007 
   14    15 
0.005 0.003 
Code
varimp.multi_fert_continuous <- variable_importance(Y.multi_fert.forest_continuous)
Y.hat.multi.all_fert_continuous <- predict(Y.multi_fert.forest_continuous, estimate.variance = TRUE)$predictions

# Fit continuous causal RF model
multi_fert.forest_continuous <- causal_forest(X = X_cf_fert, Y = Y_cf_fert, W = W_cf_fert_continuous, W.hat = W.hat.multi.all_fert_continuous, Y.hat = Y.hat.multi.all_fert_continuous, seed = 2)

varimp.multi_fert_cf_continuous <- variable_importance(multi_fert.forest_continuous)

# Average treatment effects
multi_fert_ate_continuous <- average_treatment_effect(multi_fert.forest_continuous, target.sample = "overlap")
multi_fert_ate_continuous
estimate  std.err 
9.921472 0.711926 
Code
multi_fert_continuous_calibration <- test_calibration(multi_fert.forest_continuous)
multi_fert_continuous_calibration

Best linear fit using forest predictions (on held-out data)
as well as the mean forest prediction as regressors, along
with one-sided heteroskedasticity-robust (HC3) SEs:

                               Estimate Std. Error t value    Pr(>t)    
mean.forest.prediction          0.99353    0.06987 14.2196 < 2.2e-16 ***
differential.forest.prediction  1.23712    0.26364  4.6925 1.369e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
library(ggridges)
library(dplyr)
library(ggplot2)

tau.multi_fert.forest_continuous <- predict(multi_fert.forest_continuous, target.sample = "all", estimate.variance = TRUE)

tau.multi_fert.forest_continuous <- as.data.frame(tau.multi_fert.forest_continuous)

tau.multi_fert.forest_X <- data.frame(tz_lsms_panel_estim_fert, tau.multi_fert.forest_continuous)

NUE_Density=ggplot(tau.multi_fert.forest_X, aes(x = predictions, y = "", fill = factor(stat(quantile)))) +
    stat_density_ridges(
        geom = "density_ridges_gradient", calc_ecdf = TRUE,
        quantiles = 4, quantile_lines = TRUE
    ) +
    scale_y_discrete(expand = c(0.01, 0)) +
    scale_fill_viridis_d(name = "Quartiles") +
    expand_limits(y = 1) +
    theme_bw(base_size = 16) +
    labs(x = "N use effect", y = "Density")
NUE_Density

Code
ggsave("figures/NUE_Density.png", dpi=300, width=5.16,height=4.16)

7 Exploring heterogeneity in Conditional N Use Efficiencies from CRF

7.1 Causal RF

Code
library(ggplot2)
NperHa_CATE_N_plot <- ggplot(subset(tau.multi_fert.forest_X, tau.multi_fert.forest_X$N_kgha > 0), aes(N_kgha, predictions)) +
    geom_smooth(method = "loess", formula = y ~ x, col = "darkblue") +
    labs(x = "Applied N per ha", y = "N use efficiency")

previous_theme <- theme_set(theme_bw(base_size = 16))
NperHa_CATE_N_plot

Code
Plotsize_CATE_N_plot <- ggplot(subset(tau.multi_fert.forest_X, tau.multi_fert.forest_X$plotha > 0), aes(plotha, predictions)) +
    geom_smooth(method = "loess", formula = y ~ x, col = "darkblue") +
    labs(x = "Plot size (ha)", y = "N use efficiency")
previous_theme <- theme_set(theme_bw(base_size = 16))
Plotsize_CATE_N_plot

Code
P_CATE_N_plot <- ggplot(tau.multi_fert.forest_X, aes(P_kgha, predictions)) +
    geom_smooth(method = "loess", formula = y ~ x, col = "darkblue") +
    labs(x = "P", y = "N use efficiency")
previous_theme <- theme_set(theme_bw(base_size = 16))
P_CATE_N_plot

Code
Hhmem_CATE_N_plot <- ggplot(tau.multi_fert.forest_X, aes(hhmem, predictions)) +
    geom_smooth(method = "loess", formula = y ~ x, col = "darkblue") +
    labs(x = "HHMEM", y = "N use efficiency")
previous_theme <- theme_set(theme_bw(base_size = 16))
Hhmem_CATE_N_plot

Code
# By year
tau.multi_fert.forest_X$year_det[tau.multi_fert.forest_X$year==1]=2008
tau.multi_fert.forest_X$year_det[tau.multi_fert.forest_X$year==2]=2010
tau.multi_fert.forest_X$year_det[tau.multi_fert.forest_X$year==3]=2012
tau.multi_fert.forest_X$year_det[tau.multi_fert.forest_X$year==4]=2014
tau.multi_fert.forest_X$year_det[tau.multi_fert.forest_X$year==5]=2020

tau.multi_fert.forest_X$year_det=as.numeric(tau.multi_fert.forest_X$year_det)

NperHa_CATE_N_plot_yr <- ggplot(subset(tau.multi_fert.forest_X, tau.multi_fert.forest_X$N_kgha > 0), aes(N_kgha, predictions)) +
    geom_smooth(method = "loess", formula = y ~ x, col = "darkblue") +
    lims(x = c(0, 100)) +
    labs(x = "Applied N per ha", y = "N use efficiency") +
    theme_bw(base_size = 16) +
    facet_wrap(~year_det)

NperHa_CATE_N_plot_yr

Code
# By soil organic carbon

library(ggplot2)
soc_CATE_N_plot <- ggplot(subset(tau.multi_fert.forest_X, tau.multi_fert.forest_X$soc_5_15cm > 0), aes(soc_5_15cm, predictions)) +
    geom_smooth(method = "loess", formula = y ~ x, col = "darkblue") +
    labs(x = "Soil organic carbon (%)", y = "N use efficiency")

previous_theme <- theme_set(theme_bw(base_size = 16))
soc_CATE_N_plot

Code
ggsave("figures/soc_CATE_N_plot.png", dpi=300, width=4.16,height=4.16)


# By soil sand
library(ggplot2)
sand_CATE_N_plot <- ggplot(subset(tau.multi_fert.forest_X, tau.multi_fert.forest_X$sand_0_5cm > 0), aes(sand_0_5cm, predictions)) +
    geom_smooth(method = "loess", formula = y ~ x, col = "darkblue") +
    labs(x = "Sand (%)", y = "N use efficiency")

previous_theme <- theme_set(theme_bw(base_size = 16))
sand_CATE_N_plot

Code
ggsave("figures/sand_CATE_N_plot.png", dpi=300, width=4.16,height=4.16)

# Electrical conductivity
library(ggplot2)
soil_elec_cond_CATE_N_plot <- ggplot(subset(tau.multi_fert.forest_X, tau.multi_fert.forest_X$ECN_5_15cm > 0), aes(ECN_5_15cm, predictions)) +
    geom_smooth(method = "loess", formula = y ~ x, col = "darkblue") +
    labs(x = "Soil electrical conductivity", y = "N use efficiency")

previous_theme <- theme_set(theme_bw(base_size = 16))
soil_elec_cond_CATE_N_plot

Code
ggsave("figures/soil_elec_cond_CATE_N_plot.png", dpi=300, width=4.16,height=4.16)

# By density
library(ggplot2)
pop_density_CATE_N_plot <- ggplot(subset(tau.multi_fert.forest_X, tau.multi_fert.forest_X$population_density > 0), aes(population_density, predictions)) +
    geom_smooth(method = "loess", formula = y ~ x, col = "darkblue") +
    labs(x = "Population density", y = "N use efficiency")

previous_theme <- theme_set(theme_bw(base_size = 16))
pop_density_CATE_N_plot

Code
ggsave("figures/pop_density_CATE_N_plot.png", dpi=300, width=4.16,height=4.16)

# By elevation

library(ggplot2)
elev_CATE_N_plot <- ggplot(subset(tau.multi_fert.forest_X, tau.multi_fert.forest_X$wc2.1_30s_elev > 0), aes(wc2.1_30s_elev, predictions)) +
    geom_smooth(method = "loess", formula = y ~ x, col = "darkblue") +
    labs(x = "Elevation (masl)", y = "N use efficiency")

previous_theme <- theme_set(theme_bw(base_size = 16))
elev_CATE_N_plot

Code
ggsave("figures/elev_CATE_N_plot.png", dpi=300, width=4.16,height=4.16)

# By rainfall


library(ggplot2)
rainfall_CATE_N_plot <- ggplot(subset(tau.multi_fert.forest_X, tau.multi_fert.forest_X$arain_tot > 0), aes(arain_tot, predictions)) +
    geom_smooth(method = "loess", formula = y ~ x, col = "darkblue") +
    labs(x = "Rainfall (mm)", y = "N use efficiency")+
    facet_wrap(~year_det)


previous_theme <- theme_set(theme_bw(base_size = 16))
rainfall_CATE_N_plot

8 Are N use efficiencies falling overtime?

8.1 Causal RF

Code
tau.multi_fert.forest_X$year_det[tau.multi_fert.forest_X$year==1]=2008
tau.multi_fert.forest_X$year_det[tau.multi_fert.forest_X$year==2]=2010
tau.multi_fert.forest_X$year_det[tau.multi_fert.forest_X$year==3]=2012
tau.multi_fert.forest_X$year_det[tau.multi_fert.forest_X$year==4]=2014
tau.multi_fert.forest_X$year_det[tau.multi_fert.forest_X$year==5]=2020

tau.multi_fert.forest_X$year_det=as.numeric(tau.multi_fert.forest_X$year_det)

Year_CATE_N_plot=ggplot(tau.multi_fert.forest_X,aes(year_det,predictions))+
  geom_smooth(method="loess",formula=y~x,col="darkblue")+
  labs(x="Year",y="N use efficiency")+
  scale_x_continuous(breaks = c(2008, 2010, 2012,2014,2020))
previous_theme <- theme_set(theme_bw())
Year_CATE_N_plot

Code
ggsave("figures/Year_CATE_N_plot.png", dpi=300, width=5.16,height=4.16)

library(ggpubr) 
library(tidyverse) 

#Sowing dates 

Year_CATE_N_Errorplot=
  tau.multi_fert.forest_X %>% 
  drop_na(year_det) %>%
  ggerrorplot(x = "year_det", y = "predictions",add = "mean", error.plot = "errorbar", color="black",size=1, ggtheme=theme_bw())+
  labs(x="Year",y="N use efficiency")+
  theme_bw(base_size = 16)
Year_CATE_N_Errorplot

Code
ggsave("figures/Year_CATE_N_Errorplot.png", dpi=300, width=5.16,height=4.16)

9 Double machine learning (DML)

10 Mapping the estimates

Code
library(ggridges)
library(dplyr)
library(ggplot2)

tau.multi_fert.forest_dummy <- predict(multi_fert.forest_binary, target.sample = "all", estimate.variance = TRUE)

tau.multi_fert.forest_dummy <- as.data.frame(tau.multi_fert.forest_dummy)

tau.multi_fert.forest_X_dummy <- data.frame(tz_lsms_panel_estim_fert, tau.multi_fert.forest_dummy)

ggplot(tau.multi_fert.forest_X_dummy, aes(x = predictions, y = "", fill = factor(stat(quantile)))) +
    stat_density_ridges(
        geom = "density_ridges_gradient", calc_ecdf = TRUE,
        quantiles = 4, quantile_lines = TRUE
    ) +
    scale_y_discrete(expand = c(0.01, 0)) +
    scale_fill_viridis_d(name = "Quartiles") +
    expand_limits(y = 1) +
    theme_bw(base_size = 16) +
    labs(x = "Inorgatic fert use effect", y = "Density")

Code
#
library(sp)
library(sf)
tau.multi_fert.forest_X_dummy_sp <- SpatialPointsDataFrame(cbind(tau.multi_fert.forest_X_dummy$lon_modified, tau.multi_fert.forest_X_dummy$lat_modified), data = tau.multi_fert.forest_X_dummy, proj4string = CRS("+proj=longlat +datum=WGS84"))


library(tmap)
tmap_mode("plot")
Nuse_effect_map <- tm_shape(tau.multi_fert.forest_X_dummy_sp) +
    tm_dots(col = "predictions", title = "Effect of N use on yield (kg/ha)", style = "quantile") +
    tm_layout(legend.outside = TRUE)
Nuse_effect_map

Code
tmap_save(Nuse_effect_map, "figures/Nuse_effect_map.png", width = 600, height = 600, asp = 0)


# N Use efficiency map
library(sp)
library(sf)
tau.multi_fert.forest_X_sp <- SpatialPointsDataFrame(cbind(tau.multi_fert.forest_X$lon_modified, tau.multi_fert.forest_X$lat_modified), data = tau.multi_fert.forest_X, proj4string = CRS("+proj=longlat +datum=WGS84"))


library(tmap)
tmap_mode("plot")
Nuse_efficiency_map <- tm_shape(tau.multi_fert.forest_X_sp) +
    tm_dots(col = "predictions", title = "NUE (kg Maize/Kg N)", style = "quantile") +
    tm_layout(legend.outside = TRUE)
Nuse_efficiency_map

Code
tmap_save(Nuse_efficiency_map, "figures/Nuse_efficiency_map.png", width = 600, height = 600, asp = 0)

# N Use  map
library(sp)
library(sf)

tau.multi_fert.forest_X_sp_small <- subset(tau.multi_fert.forest_X_sp, tau.multi_fert.forest_X_sp$N_kgha > 0)

library(tmap)
tmap_mode("plot")
Nuse_map <- tm_shape(tau.multi_fert.forest_X_sp_small) +
    tm_dots(col = "N_kgha", title = "N applied (Kg/ha)", style = "quantile") +
    tm_layout(legend.outside = TRUE)
Nuse_map

Code
tmap_save(Nuse_map, "figures/Nuse_map.png", width = 600, height = 600, asp = 0)

10.1 Map of N Use Efficiencies overtime

Code
library(tmap)
tmap_mode("plot")
Nuse_efficiency_map_yr <- tm_shape(tau.multi_fert.forest_X_sp) +
    tm_dots(col = "predictions", title = "NUE (kg Maize/Kg N)", style = "quantile") +
    tm_layout(legend.outside = TRUE) +
    tm_facets("year")
Nuse_efficiency_map_yr

Code
tmap_save(Nuse_efficiency_map_yr, "figures/Nuse_efficiency_map_yr.png", width = 600, height = 600, asp = 0)

11 Mapping Descriptive Statistics

Code
library(geodata)

TZ <- gadm(country = "TZ", level = 1, path = tempdir())
plot(TZ)

Code
TZ_sf= st_as_sf(TZ)
TZ_sp=as_Spatial(TZ_sf)


library(tmap)

tm_shape(TZ_sp) + 
  tm_polygons(col="grey",legend.show = F,border.col="white")+
  tmap_options(max.categories = 31)+
  tm_text("NAME_1", size = 0.7)

Code
tau.multi_fert.forest_X_sp$region_name <- over(tau.multi_fert.forest_X_sp, TZ_sp[,"NAME_1"])



# Summary by region and by year
library(modelsummary)

tau.multi_fert.forest_X_sp_dt <-tau.multi_fert.forest_X_sp@data

tau.multi_fert.forest_X_sp_dt$region_name_final=tau.multi_fert.forest_X_sp_dt$region_name$NAME_1

NUE_cont <- datasummary(Heading("region_name")*region_name_final~ Heading("N_obs") * N + Heading("percent") * Percent()+ predictions*(Mean+SD),data = tau.multi_fert.forest_X_sp_dt, output="data.frame")

NUE_cont$N_obs=as.numeric(NUE_cont$N_obs)
NUE_cont$Mean=as.numeric(NUE_cont$Mean)
NUE_cont$SD=as.numeric(NUE_cont$SD)

library(reactable)
library(htmltools)
library(fontawesome)

htmltools::browsable(
    tagList(
        tags$button(
            tagList(fontawesome::fa("download"), "Download as CSV"),
            onclick = "Reactable.downloadDataCSV('NUE_cont', 'NUE_cont.csv')"
        ),
        reactable(
            NUE_cont,
            searchable = TRUE,
            defaultPageSize = 38,
            elementId = "NUE_cont"
        )
    )
)
Code
# Remove regions with fewer observations than 50
NUE_cont=subset(NUE_cont,NUE_cont$N_obs> 50)

NUE_cont_sf=merge(TZ_sf,NUE_cont, by.x="NAME_1",by.y="region_name")

# Map the 

library(tmap)
tmap_mode("plot")
tm_shape(NUE_cont_sf) +
  tm_polygons(col = "Mean", title = "Average NUE", style = "quantile") +
  tm_layout(legend.outside = TRUE)

12 Mapping NUEs by year and region

Code
mean_na <- function(x) mean(x, na.rm = TRUE)
sd_na <- function(x) SD(x, na.rm = TRUE)
p25_na <- function(x) quantile(x, probs = 0.25, na.rm = TRUE)
median_na <- function(x) median(x, na.rm = TRUE)
p75_na <- function(x) quantile(x, probs = 0.75, na.rm = TRUE)

NUE_cont_region_year <- datasummary(Factor(year)+Factor(region_name_final)~Heading("N obs") * N + Heading("%") * Percent() + (predictions) * (mean_na + sd_na), data = tau.multi_fert.forest_X_sp_dt, output = "data.frame")

library(reactable)
library(htmltools)
library(fontawesome)

htmltools::browsable(
    tagList(
        tags$button(
            tagList(fontawesome::fa("download"), "Download as CSV"),
            onclick = "Reactable.downloadDataCSV('NUE_cont_region_year', 'NUE_cont_region_year.csv')"
        ),
        reactable(
            NUE_cont_region_year,
            searchable = TRUE,
            defaultPageSize = 38,
            elementId = "NUE_cont_region_year"
        )
    )
)
Code
# Mix
library(data.table)
tau.multi_fert.forest_X_sp_dt <- data.table (tau.multi_fert.forest_X_sp_dt)
tau.multi_fert.forest_X_sp_dt$One=1

NUE_cont_region_year_mix <- tau.multi_fert.forest_X_sp_dt[,.(
  Mean=base::mean(predictions, na.rm=TRUE),
  SD=sd(predictions, na.rm = TRUE),
  N_obs=sum(One, na.rm = TRUE),
  Maize_price_USD=base::mean(maipkg_usd, na.rm=TRUE),
  N_price_USD=base::mean(Npkg_usd, na.rm=TRUE)
  ), by =c("year","region_name_final")]

NUE_cont_region_year_mix$year_det[NUE_cont_region_year_mix$year==1]=2008
NUE_cont_region_year_mix$year_det[NUE_cont_region_year_mix$year==2]=2010
NUE_cont_region_year_mix$year_det[NUE_cont_region_year_mix$year==3]=2012
NUE_cont_region_year_mix$year_det[NUE_cont_region_year_mix$year==4]=2014
NUE_cont_region_year_mix$year_det[NUE_cont_region_year_mix$year==5]=2020

library(reactable)
library(htmltools)
library(fontawesome)

htmltools::browsable(
    tagList(
        tags$button(
            tagList(fontawesome::fa("download"), "Download as CSV"),
            onclick = "Reactable.downloadDataCSV('NUE_cont_region_year_mix', 'NUE_cont_region_year_mix.csv')"
        ),
        reactable(
            NUE_cont_region_year_mix,
            searchable = TRUE,
            defaultPageSize = 38,
            elementId = "NUE_cont_region_year_mix"
        )
    )
)
Code
NUE_cont_region_year_mix=subset(NUE_cont_region_year_mix,NUE_cont_region_year_mix$N_obs> 10)

NUE_cont_region_year_mix_sf=merge(TZ_sf,NUE_cont_region_year_mix, by.x="NAME_1",by.y="region_name_final")



library(tmap)
tmap_mode("plot")
NUE_year_region <- tm_shape(NUE_cont_region_year_mix_sf) +
    tm_polygons(col = "Mean", title = "NUE (kg Maize/Kg N)", style = "quantile") +
    tm_layout(legend.outside = TRUE) +
    tm_facets("year_det")
NUE_year_region

Code
tmap_save(NUE_year_region, "figures/NUE_year_region.png", width = 600, height = 600, asp = 0)

13 Mapping partial profits by year and region

Code
library(rio)

Maize_Prices_Tanzania_2021_dt_region <-import("admin_data/Maize_Prices_Tanzania_2021_dt_region.csv")

Maize_Prices_Tanzania_2021_dt_region <-subset(Maize_Prices_Tanzania_2021_dt_region,select=c("Region","Maize_max_price_per_kg","Maize_min_price_per_kg"))

Urea_Prices_Tanzania_sp_dt_region <-import ("admin_data/Urea_Prices_Tanzania_sp_dt_region.csv")

Urea_Prices_Tanzania_sp_dt_region <-subset(Urea_Prices_Tanzania_sp_dt_region,select=c("region_name_final","mean_N_price","SD_N_price"))

urea_maize_prices_region <- merge(Maize_Prices_Tanzania_2021_dt_region,Urea_Prices_Tanzania_sp_dt_region,by.x="Region",by.y="region_name_final")

NUE_cont_region_year_mix_sf_prices=merge(NUE_cont_region_year_mix_sf,urea_maize_prices_region,by.x="NAME_1",by.y="Region")

NUE_cont_region_year_mix_sf_prices$maize_rev_from_N = NUE_cont_region_year_mix_sf_prices$Mean*NUE_cont_region_year_mix_sf_prices$Maize_max_price_per_kg

NUE_cont_region_year_mix_sf_prices$Survey_USD_maize_rev_from_N = NUE_cont_region_year_mix_sf_prices$Mean*NUE_cont_region_year_mix_sf_prices$Maize_price_USD

NUE_cont_region_year_mix_sf_prices$partial_profit= NUE_cont_region_year_mix_sf_prices$maize_rev_from_N-NUE_cont_region_year_mix_sf_prices$mean_N_price

NUE_cont_region_year_mix_sf_prices$Survey_USD_partial_profit= NUE_cont_region_year_mix_sf_prices$Survey_USD_maize_rev_from_N-NUE_cont_region_year_mix_sf_prices$N_price_USD

NUE_cont_region_year_mix_sf_prices$vcr=NUE_cont_region_year_mix_sf_prices$maize_rev_from_N/NUE_cont_region_year_mix_sf_prices$mean_N_price

  
NUE_cont_region_year_mix_sf_prices$Survey_USD_vcr=NUE_cont_region_year_mix_sf_prices$Survey_USD_maize_rev_from_N/NUE_cont_region_year_mix_sf_prices$N_price_USD

library(sf)

NUE_cont_region_year_mix_sf_prices_sp <- as_Spatial(NUE_cont_region_year_mix_sf_prices)

NUE_cont_region_year_mix_sf_prices_sp_dt <- NUE_cont_region_year_mix_sf_prices_sp@data

NUE_cont_region_year_mix_sf_prices_sp_dt <-as.data.frame(NUE_cont_region_year_mix_sf_prices_sp_dt)

# Export the table
library(reactable)
library(htmltools)
library(fontawesome)


htmltools::browsable(
    tagList(
        tags$button(
            tagList(fontawesome::fa("download"), "Download as CSV"),
            onclick = "Reactable.downloadDataCSV('NUE_cont_region_year_mix_sf_prices_sp_dt', 'NUE_cont_region_year_mix_sf_prices_sp_dt.csv')"
        ),
        reactable(
            NUE_cont_region_year_mix_sf_prices_sp_dt,
            searchable = TRUE,
            defaultPageSize = 38,
            elementId = "NUE_cont_region_year_mix_sf_prices_sp_dt"
        )
    )
)
Code
# N price 
library(tmap)
tmap_mode("plot")
N_price_year_region <- tm_shape(NUE_cont_region_year_mix_sf_prices) +
    tm_polygons(col = "mean_N_price", title = "Nitrogen price (TZS/kg)", style = "quantile") +
    tm_layout(legend.outside = TRUE) 
N_price_year_region

Code
tmap_save(N_price_year_region, "figures/N_price_year_region.png", width = 600, height = 600)

library(tmap)
tmap_mode("plot")
Survey_USD_N_price_year_region <- tm_shape(NUE_cont_region_year_mix_sf_prices) +
    tm_polygons(col = "N_price_USD", title = "Nitrogen price (USD/kg)", style = "quantile") +
    tm_layout(legend.outside = TRUE) 
Survey_USD_N_price_year_region

Code
tmap_save(Survey_USD_N_price_year_region, "figures/Survey_USD_N_price_year_region.png", width = 600, height = 600)

# Maize output price 
library(tmap)
tmap_mode("plot")
maize_price_year_region <- tm_shape(NUE_cont_region_year_mix_sf_prices) +
    tm_polygons(col = "Maize_max_price_per_kg", title = "2021 maize prices (TZS/kg)", style = "quantile") +
    tm_layout(legend.outside = TRUE) 
maize_price_year_region

Code
tmap_save(maize_price_year_region, "figures/maize_price_year_region.png", width = 600, height = 600)

library(tmap)
tmap_mode("plot")
Survey_USD_maize_price_year_region <- tm_shape(NUE_cont_region_year_mix_sf_prices) +
    tm_polygons(col = "Maize_price_USD", title = "Maize prices (USD/kg)", style = "quantile") +
    tm_layout(legend.outside = TRUE) 
Survey_USD_maize_price_year_region

Code
tmap_save(Survey_USD_maize_price_year_region, "figures/Survey_USD_maize_price_year_region.png", width = 600, height = 600)

# Value cost ratio


library(tmap)
tmap_mode("plot")
vcr_year_region <- tm_shape(NUE_cont_region_year_mix_sf_prices) +
    tm_polygons(col = "vcr", title = "Maize value-cost ratio (VCR)", style = "quantile") +
    tm_layout(legend.outside = TRUE) +
    tm_facets("year_det")
vcr_year_region

Code
tmap_save(vcr_year_region, "figures/vcr_year_region.png", width = 600, height = 600)

library(tmap)
tmap_mode("plot")
Survey_USD_vcr_year_region <- tm_shape(NUE_cont_region_year_mix_sf_prices) +
    tm_polygons(col = "Survey_USD_vcr", title = "Maize value-cost ratio (VCR)", style = "quantile") +
    tm_layout(legend.outside = TRUE) +
    tm_facets("year_det")
Survey_USD_vcr_year_region

Code
tmap_save(Survey_USD_vcr_year_region, "figures/Survey_USD_vcr_year_region.png", width = 600, height = 600)

# Partial profit to nitrogen application

library(tmap)
tmap_mode("plot")
partial_prof_year_region <- tm_shape(NUE_cont_region_year_mix_sf_prices) +
    tm_polygons(col = "partial_profit", title = "Partial profit (TZS /Kg N)", style = "quantile") +
    tm_layout(legend.outside = TRUE) +
    tm_facets("year_det")
partial_prof_year_region

Code
tmap_save(partial_prof_year_region, "figures/partial_prof_year_region.png", width = 600, height = 600)

library(tmap)
tmap_mode("plot")
Survey_USD_partial_prof_year_region <- tm_shape(NUE_cont_region_year_mix_sf_prices) +
    tm_polygons(col = "Survey_USD_partial_profit", title = "Partial profit (USD /Kg N)", style = "quantile") +
    tm_layout(legend.outside = TRUE) +
    tm_facets("year_det")
Survey_USD_partial_prof_year_region

Code
tmap_save(Survey_USD_partial_prof_year_region, "figures/Survey_USD_partial_prof_year_region.png", width = 600, height = 600)

14 Spatially Gridded Profitability Analysis

Code
# Gridded yield gains


lon <- tau.multi_fert.forest_X$lon_modified
lon <- as.data.frame(lon)
lat <- tau.multi_fert.forest_X$lat_modified
lat <- as.data.frame(lat)


tau.multi_fert.forest_X <- cbind(lon, lat, tau.multi_fert.forest_X)

library(sp)
tau.multi_fert.forest_X_sp_kr <-
    SpatialPointsDataFrame(cbind(tau.multi_fert.forest_X$lon, tau.multi_fert.forest_X$lat), data = tau.multi_fert.forest_X, proj4string = CRS("+proj=longlat +datum=WGS84"))

tau.multi_fert.forest_X_sp_kr_2008 =subset(tau.multi_fert.forest_X_sp_kr,tau.multi_fert.forest_X_sp_kr$year==1)

# 2008 prediction 
library(bamlss)
set.seed(111)
f <- predictions ~  s(lon,lat)

## estimate model.
b <- bamlss(f, data = tau.multi_fert.forest_X_sp_kr)
AICc 41271.03 logPost -20732.8 logLik -20613.9 edf 21.560 eps 0.1030 iteration   1
AICc 41174.35 logPost -20648.4 logLik -20557.6 edf 29.453 eps 0.0182 iteration   2
AICc 41173.77 logPost -20648.5 logLik -20556.6 edf 30.150 eps 0.0009 iteration   3
AICc 41173.77 logPost -20648.5 logLik -20556.6 edf 30.151 eps 0.0000 iteration   4
AICc 41173.77 logPost -20648.5 logLik -20556.6 edf 30.151 eps 0.0000 iteration   4
elapsed time:  0.40sec
Starting the sampler...

|                    |   0% 24.99sec
|*                   |   5% 22.42sec  1.18sec
|**                  |  10% 20.16sec  2.24sec
|***                 |  15% 18.81sec  3.32sec
|****                |  20% 18.08sec  4.52sec
|*****               |  25% 17.31sec  5.77sec
|******              |  30% 16.26sec  6.97sec
|*******             |  35% 15.21sec  8.19sec
|********            |  40% 14.16sec  9.44sec
|*********           |  45% 13.07sec 10.69sec
|**********          |  50% 11.88sec 11.88sec
|***********         |  55% 10.79sec 13.19sec
|************        |  60%  9.61sec 14.41sec
|*************       |  65%  8.42sec 15.64sec
|**************      |  70%  7.25sec 16.91sec
|***************     |  75%  6.06sec 18.19sec
|****************    |  80%  4.87sec 19.47sec
|*****************   |  85%  3.66sec 20.74sec
|******************  |  90%  2.44sec 21.97sec
|******************* |  95%  1.22sec 23.25sec
|********************| 100%  0.00sec 24.46sec
Code
plot(b)

Code
# Boundary map
library(sf)
library(geodata)
set.seed(111)

TZ <- gadm(country = "TZ", level = 0, path = tempdir())
plot(TZ)

Code
TZ_sf= st_as_sf(TZ)
TZ_sp=as_Spatial(TZ_sf)

TZ_sf_dis <- st_union(TZ_sf)
plot(TZ_sf_dis)

Code
# Predict
elevationglobal_geodata <- elevation_global(2.5, path =tempdir())

elevationglobal_geodata_aoi <- terra::crop(elevationglobal_geodata,TZ_sf_dis)

library(raster)
elevationglobal_geodata_aoi <- raster(elevationglobal_geodata_aoi)

plot(elevationglobal_geodata_aoi)

Code
pred <- SpatialPoints(elevationglobal_geodata_aoi)@coords
pred <- as.data.frame(pred)

names(pred)[1:2] <- c("lon", "lat")

pred <- pred %>% drop_na()

library(sp)
pred_sp <-
    SpatialPointsDataFrame(cbind(pred$lon, pred$lat), data = pred, proj4string = CRS("+proj=longlat +datum=WGS84"))


yield_gain_hat <- predict(b, newdata = pred)

yield_gain_hat <- as.data.frame(yield_gain_hat)

pred_yield_gain_hat <- cbind(pred, yield_gain_hat )

pred_yield_gain_hat$sigma <- NULL
library(terra)

yield_gain_ras <- rast(pred_yield_gain_hat, type = "xyz")
plot(yield_gain_ras )

Code
library(raster)
yield_gain_ras2 <- raster(yield_gain_ras)

library(sf)
TZ_sf_dis_sp <- as_Spatial(TZ_sf_dis)
yield_gain_ras2_2008 <- mask(yield_gain_ras2, TZ_sf_dis_sp)
plot(yield_gain_ras2_2008, main = "Yield gains to N")

Code
library(rasterVis)

levelplot(yield_gain_ras2_2008, layers = 1, margin = list(FUN = 'median'), contour=TRUE)

Code
## 
library(tmap)
yield_gain_raster_map_2008=tm_shape(yield_gain_ras2_2008) +
  tm_raster(title = "Nitrogen Use Efficiency",style = "cont", palette = "viridis")+
  tm_legend(position = c("right", "top"),legend.title.size=1.2,legend.text.size=1.2)
yield_gain_raster_map_2008

Code
tmap_save(yield_gain_raster_map_2008,"figures/yield_gain_raster_map_2008.png", dpi=300,width=4.88,height = 4.16)


# 2010 prediction 
# tau.multi_fert.forest_X_sp_kr_2010 =subset(tau.multi_fert.forest_X_sp_kr,tau.multi_fert.forest_X_sp_kr$year==1)
# 
# library(bamlss)
# set.seed(111)
# f <- predictions ~  s(lon,lat)
# 
# ## estimate model.
# b <- bamlss(f, data = tau.multi_fert.forest_X_sp_kr_2010)
# 
# plot(b)
# 
# # Boundary map
# library(sf)
# library(geodata)
# set.seed(111)
# 
# TZ <- gadm(country = "TZ", level = 0, path = tempdir())
# plot(TZ)
# 
# TZ_sf= st_as_sf(TZ)
# TZ_sp=as_Spatial(TZ_sf)
# 
# TZ_sf_dis <- st_union(TZ_sf)
# plot(TZ_sf_dis)
# 
# 
# # Predict
# elevationglobal_geodata <- elevation_global(2.5, path =tempdir())
# 
# elevationglobal_geodata_aoi <- terra::crop(elevationglobal_geodata,TZ_sf_dis)
# 
# library(raster)
# elevationglobal_geodata_aoi <- raster(elevationglobal_geodata_aoi)
# 
# plot(elevationglobal_geodata_aoi)
# 
# pred <- SpatialPoints(elevationglobal_geodata_aoi)@coords
# pred <- as.data.frame(pred)
# 
# names(pred)[1:2] <- c("lon", "lat")
# 
# pred <- pred %>% drop_na()
# 
# library(sp)
# pred_sp <-
#     SpatialPointsDataFrame(cbind(pred$lon, pred$lat), data = pred, proj4string = CRS("+proj=longlat +datum=WGS84"))
# 
# 
# yield_gain_hat <- predict(b, newdata = pred)
# 
# yield_gain_hat <- as.data.frame(yield_gain_hat)
# 
# pred_yield_gain_hat <- cbind(pred, yield_gain_hat )
# 
# pred_yield_gain_hat$sigma <- NULL
# library(terra)
# 
# yield_gain_ras <- rast(pred_yield_gain_hat, type = "xyz")
# plot(yield_gain_ras )
# 
# library(raster)
# yield_gain_ras2 <- raster(yield_gain_ras)
# 
# library(sf)
# TZ_sf_dis_sp <- as_Spatial(TZ_sf_dis)
# yield_gain_ras2_2010 <- mask(yield_gain_ras2, TZ_sf_dis_sp)
# plot(yield_gain_ras2_2010, main = "Yield gains to N")
# 
# library(rasterVis)
# 
# levelplot(yield_gain_ras2_2010, layers = 1, margin = list(FUN = 'median'), contour=TRUE)
# 
# ## 
# library(tmap)
# yield_gain_raster_map_2010=tm_shape(yield_gain_ras2_2010) +
#   tm_raster(title = "Nitrogen Use Efficiency",style = "cont", palette = "viridis")+
#   tm_legend(position = c("right", "top"),legend.title.size=1.2,legend.text.size=1.2)
# yield_gain_raster_map_2010
# 
# tmap_save(yield_gain_raster_map_2010,"figures/yield_gain_raster_map_2010.png", dpi=300,width=4.88,height = 4.16)
# 
# # 2012 prediction
# 
# tau.multi_fert.forest_X_sp_kr_2012 =subset(tau.multi_fert.forest_X_sp_kr,tau.multi_fert.forest_X_sp_kr$year==3)
# 
# library(bamlss)
# set.seed(111)
# f <- predictions ~  s(lon,lat)
# 
# ## estimate model.
# b <- bamlss(f, data = tau.multi_fert.forest_X_sp_kr_2012)
# 
# plot(b)
# 
# # Boundary map
# library(sf)
# library(geodata)
# set.seed(111)
# 
# TZ <- gadm(country = "TZ", level = 0, path = tempdir())
# plot(TZ)
# 
# TZ_sf= st_as_sf(TZ)
# TZ_sp=as_Spatial(TZ_sf)
# 
# TZ_sf_dis <- st_union(TZ_sf)
# plot(TZ_sf_dis)
# 
# 
# # Predict
# elevationglobal_geodata <- elevation_global(2.5, path =tempdir())
# 
# elevationglobal_geodata_aoi <- terra::crop(elevationglobal_geodata,TZ_sf_dis)
# 
# library(raster)
# elevationglobal_geodata_aoi <- raster(elevationglobal_geodata_aoi)
# 
# plot(elevationglobal_geodata_aoi)
# 
# pred <- SpatialPoints(elevationglobal_geodata_aoi)@coords
# pred <- as.data.frame(pred)
# 
# names(pred)[1:2] <- c("lon", "lat")
# 
# pred <- pred %>% drop_na()
# 
# library(sp)
# pred_sp <-
#     SpatialPointsDataFrame(cbind(pred$lon, pred$lat), data = pred, proj4string = CRS("+proj=longlat +datum=WGS84"))
# 
# 
# yield_gain_hat <- predict(b, newdata = pred)
# 
# yield_gain_hat <- as.data.frame(yield_gain_hat)
# 
# pred_yield_gain_hat <- cbind(pred, yield_gain_hat )
# 
# pred_yield_gain_hat$sigma <- NULL
# library(terra)
# 
# yield_gain_ras <- rast(pred_yield_gain_hat, type = "xyz")
# plot(yield_gain_ras )
# 
# library(raster)
# yield_gain_ras2 <- raster(yield_gain_ras)
# 
# library(sf)
# TZ_sf_dis_sp <- as_Spatial(TZ_sf_dis)
# yield_gain_ras2_2010 <- mask(yield_gain_ras2, TZ_sf_dis_sp)
# plot(yield_gain_ras2_2010, main = "Yield gains to N")
# 
# library(rasterVis)
# 
# levelplot(yield_gain_ras2_2012, layers = 1, margin = list(FUN = 'median'), contour=TRUE)
# 
# ## 
# library(tmap)
# yield_gain_raster_map_2012=tm_shape(yield_gain_ras2_2010) +
#   tm_raster(title = "Nitrogen Use Efficiency",style = "cont", palette = "viridis")+
#   tm_legend(position = c("right", "top"),legend.title.size=1.2,legend.text.size=1.2)
# yield_gain_raster_map_2010
# 
# tmap_save(yield_gain_raster_map_2012,"figures/yield_gain_raster_map_2012.png", dpi=300,width=4.88,height = 4.16)


# 2014




# 2020